// This file is a part of Julia. License is MIT: https://julialang.org/license // Lower intrinsics that expose subtarget information to the language. This makes it // possible to write code that changes behavior based on, e.g., the availability of // specific CPU features. // // The following intrinsics are supported: // - julia.cpu.have_fma.$typ: returns 1 if the platform supports hardware-accelerated FMA. // // Some of these intrinsics are overloaded, i.e., they are suffixed with a type name. // To extend support, make sure codegen (in intrinsics.cpp) knows how to emit them. // // XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig // instead of using the global target machine? #include "llvm-version.h" #include "passes.h" #include #include #include #include #include #include #include #include #include #include "jitlayers.h" #define DEBUG_TYPE "cpufeatures" using namespace llvm; STATISTIC(LoweredWithFMA, "Number of have_fma's that were lowered to true"); STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false"); extern JuliaOJIT *jl_ExecutionEngine; // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA Optional always_have_fma(Function &intr) { auto intr_name = intr.getName(); auto typ = intr_name.substr(strlen("julia.cpu.have_fma.")); #if defined(_CPU_AARCH64_) return typ == "f32" || typ == "f64"; #else (void)typ; return {}; #endif } bool have_fma(Function &intr, Function &caller) { auto unconditional = always_have_fma(intr); if (unconditional.hasValue()) return unconditional.getValue(); auto intr_name = intr.getName(); auto typ = intr_name.substr(strlen("julia.cpu.have_fma.")); Attribute FSAttr = caller.getFnAttribute("target-features"); StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString(); SmallVector Features; FS.split(Features, ','); for (StringRef Feature : Features) #if defined _CPU_ARM_ if (Feature == "+vfp4") return typ == "f32" || typ == "f64"; else if (Feature == "+vfp4sp") return typ == "f32"; #else if (Feature == "+fma" || Feature == "+fma4") return typ == "f32" || typ == "f64"; #endif return false; } void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) { if (have_fma(intr, caller)) { ++LoweredWithFMA; I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1)); } else { ++LoweredWithoutFMA; I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0)); } return; } bool lowerCPUFeatures(Module &M) { SmallVector Materialized; for (auto &F: M.functions()) { auto FN = F.getName(); if (FN.startswith("julia.cpu.have_fma.")) { for (Use &U: F.uses()) { User *RU = U.getUser(); CallInst *I = cast(RU); lowerHaveFMA(F, *I->getParent()->getParent(), I); Materialized.push_back(I); } } } if (!Materialized.empty()) { for (auto I: Materialized) { I->eraseFromParent(); } #ifdef JL_VERIFY_PASSES assert(!verifyModule(M, &errs())); #endif return true; } else { return false; } } PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM) { if (lowerCPUFeatures(M)) { return PreservedAnalyses::allInSet(); } return PreservedAnalyses::all(); } namespace { struct CPUFeaturesLegacy : public ModulePass { static char ID; CPUFeaturesLegacy() : ModulePass(ID) {}; bool runOnModule(Module &M) { return lowerCPUFeatures(M); } }; char CPUFeaturesLegacy::ID = 0; static RegisterPass Y("CPUFeatures", "Lower calls to CPU feature testing intrinsics.", false, false); } Pass *createCPUFeaturesPass() { return new CPUFeaturesLegacy(); } extern "C" JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM) { unwrap(PM)->add(createCPUFeaturesPass()); }