https://github.com/halide/Halide
Tip revision: f9e4c7878385f43cf88cca23d5bd663233e9e7da authored by Steven Johnson on 27 April 2021, 19:14:54 UTC
Add support for dynamic tensors to hannk (#5942)
Add support for dynamic tensors to hannk (#5942)
Tip revision: f9e4c78
LLVM_Runtime_Linker.cpp
#include "LLVM_Runtime_Linker.h"
#include "Error.h"
#include "LLVM_Headers.h"
#include "Target.h"
namespace Halide {
using std::string;
using std::vector;
namespace {
std::unique_ptr<llvm::Module> parse_bitcode_file(llvm::StringRef buf, llvm::LLVMContext *context, const char *id) {
llvm::MemoryBufferRef bitcode_buffer = llvm::MemoryBufferRef(buf, id);
auto ret_val = llvm::expectedToErrorOr(
llvm::parseBitcodeFile(bitcode_buffer, *context));
if (!ret_val) {
internal_error << "Could not parse built-in bitcode file " << id
<< " llvm error is " << ret_val.getError() << "\n";
}
std::unique_ptr<llvm::Module> result(std::move(*ret_val));
result->setModuleIdentifier(id);
return result;
}
#define DECLARE_INITMOD(mod) \
extern "C" unsigned char halide_internal_initmod_##mod[]; \
extern "C" int halide_internal_initmod_##mod##_length; \
std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context) { \
llvm::StringRef sb = llvm::StringRef((const char *)halide_internal_initmod_##mod, \
halide_internal_initmod_##mod##_length); \
return parse_bitcode_file(sb, context, #mod); \
}
#define DECLARE_NO_INITMOD(mod) \
std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *, bool = false, bool = false) { \
user_error << "Halide was compiled without support for this target\n"; \
return std::unique_ptr<llvm::Module>(); \
} \
std::unique_ptr<llvm::Module> get_initmod_##mod##_ll(llvm::LLVMContext *) { \
user_error << "Halide was compiled without support for this target\n"; \
return std::unique_ptr<llvm::Module>(); \
}
#define DECLARE_CPP_INITMOD_LOOKUP(mod) \
std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context, bool bits_64, bool debug) { \
if (bits_64) { \
if (debug) { \
return get_initmod_##mod##_64_debug(context); \
} else { \
return get_initmod_##mod##_64(context); \
} \
} else { \
if (debug) { \
return get_initmod_##mod##_32_debug(context); \
} else { \
return get_initmod_##mod##_32(context); \
} \
} \
}
#define DECLARE_CPP_INITMOD(mod) \
DECLARE_INITMOD(mod##_32_debug) \
DECLARE_INITMOD(mod##_64_debug) \
DECLARE_INITMOD(mod##_32) \
DECLARE_INITMOD(mod##_64) \
DECLARE_CPP_INITMOD_LOOKUP(mod)
#define DECLARE_LL_INITMOD(mod) \
DECLARE_INITMOD(mod##_ll)
// Universal CPP Initmods. Please keep sorted alphabetically.
DECLARE_CPP_INITMOD(alignment_128)
DECLARE_CPP_INITMOD(alignment_32)
DECLARE_CPP_INITMOD(allocation_cache)
DECLARE_CPP_INITMOD(alignment_64)
DECLARE_CPP_INITMOD(android_clock)
DECLARE_CPP_INITMOD(android_host_cpu_count)
DECLARE_CPP_INITMOD(android_io)
DECLARE_CPP_INITMOD(halide_buffer_t)
DECLARE_CPP_INITMOD(cache)
DECLARE_CPP_INITMOD(can_use_target)
DECLARE_CPP_INITMOD(cuda)
DECLARE_CPP_INITMOD(destructors)
DECLARE_CPP_INITMOD(device_interface)
DECLARE_CPP_INITMOD(errors)
DECLARE_CPP_INITMOD(fake_get_symbol)
DECLARE_CPP_INITMOD(fake_thread_pool)
DECLARE_CPP_INITMOD(float16_t)
DECLARE_CPP_INITMOD(fuchsia_clock)
DECLARE_CPP_INITMOD(fuchsia_host_cpu_count)
DECLARE_CPP_INITMOD(fuchsia_yield)
DECLARE_CPP_INITMOD(gpu_device_selection)
DECLARE_CPP_INITMOD(hexagon_dma)
DECLARE_CPP_INITMOD(hexagon_host)
DECLARE_CPP_INITMOD(ios_io)
DECLARE_CPP_INITMOD(linux_clock)
DECLARE_CPP_INITMOD(linux_host_cpu_count)
DECLARE_CPP_INITMOD(linux_yield)
DECLARE_CPP_INITMOD(matlab)
DECLARE_CPP_INITMOD(metadata)
DECLARE_CPP_INITMOD(module_aot_ref_count)
DECLARE_CPP_INITMOD(module_jit_ref_count)
DECLARE_CPP_INITMOD(msan)
DECLARE_CPP_INITMOD(msan_stubs)
DECLARE_CPP_INITMOD(opencl)
DECLARE_CPP_INITMOD(openglcompute)
DECLARE_CPP_INITMOD(opengl_egl_context)
DECLARE_CPP_INITMOD(opengl_glx_context)
DECLARE_CPP_INITMOD(osx_clock)
DECLARE_CPP_INITMOD(osx_get_symbol)
DECLARE_CPP_INITMOD(osx_host_cpu_count)
DECLARE_CPP_INITMOD(osx_opengl_context)
DECLARE_CPP_INITMOD(osx_yield)
DECLARE_CPP_INITMOD(posix_allocator)
DECLARE_CPP_INITMOD(posix_clock)
DECLARE_CPP_INITMOD(posix_error_handler)
DECLARE_CPP_INITMOD(posix_get_symbol)
DECLARE_CPP_INITMOD(posix_io)
DECLARE_CPP_INITMOD(posix_print)
DECLARE_CPP_INITMOD(posix_threads)
DECLARE_CPP_INITMOD(posix_threads_tsan)
DECLARE_CPP_INITMOD(prefetch)
DECLARE_CPP_INITMOD(profiler)
DECLARE_CPP_INITMOD(profiler_inlined)
DECLARE_CPP_INITMOD(pseudostack)
DECLARE_CPP_INITMOD(qurt_allocator)
DECLARE_CPP_INITMOD(hexagon_cache_allocator)
DECLARE_CPP_INITMOD(hexagon_dma_pool)
DECLARE_CPP_INITMOD(qurt_hvx)
DECLARE_CPP_INITMOD(qurt_hvx_vtcm)
DECLARE_CPP_INITMOD(qurt_init_fini)
DECLARE_CPP_INITMOD(qurt_threads)
DECLARE_CPP_INITMOD(qurt_threads_tsan)
DECLARE_CPP_INITMOD(qurt_yield)
DECLARE_CPP_INITMOD(runtime_api)
DECLARE_CPP_INITMOD(ssp)
DECLARE_CPP_INITMOD(to_string)
DECLARE_CPP_INITMOD(trace_helper)
DECLARE_CPP_INITMOD(tracing)
DECLARE_CPP_INITMOD(windows_clock)
DECLARE_CPP_INITMOD(windows_cuda)
DECLARE_CPP_INITMOD(windows_get_symbol)
DECLARE_CPP_INITMOD(windows_io)
DECLARE_CPP_INITMOD(windows_opencl)
DECLARE_CPP_INITMOD(windows_profiler)
DECLARE_CPP_INITMOD(windows_threads)
DECLARE_CPP_INITMOD(windows_threads_tsan)
DECLARE_CPP_INITMOD(windows_yield)
DECLARE_CPP_INITMOD(write_debug_image)
// Universal LL Initmods. Please keep sorted alphabetically.
DECLARE_LL_INITMOD(posix_math)
DECLARE_LL_INITMOD(win32_math)
DECLARE_LL_INITMOD(ptx_dev)
// Various conditional initmods follow (both LL and CPP).
#ifdef WITH_METAL
DECLARE_CPP_INITMOD(metal)
#ifdef WITH_AARCH64
DECLARE_CPP_INITMOD(metal_objc_arm)
#else
DECLARE_NO_INITMOD(metal_objc_arm)
#endif
#ifdef WITH_X86
DECLARE_CPP_INITMOD(metal_objc_x86)
#else
DECLARE_NO_INITMOD(metal_objc_x86)
#endif
#else
DECLARE_NO_INITMOD(metal)
DECLARE_NO_INITMOD(metal_objc_arm)
DECLARE_NO_INITMOD(metal_objc_x86)
#endif // WITH_METAL
#ifdef WITH_ARM
DECLARE_LL_INITMOD(arm)
DECLARE_LL_INITMOD(arm_no_neon)
DECLARE_CPP_INITMOD(arm_cpu_features)
#else
DECLARE_NO_INITMOD(arm)
DECLARE_NO_INITMOD(arm_no_neon)
DECLARE_NO_INITMOD(arm_cpu_features)
#endif // WITH_ARM
#ifdef WITH_AARCH64
DECLARE_LL_INITMOD(aarch64)
DECLARE_CPP_INITMOD(aarch64_cpu_features)
#else
DECLARE_NO_INITMOD(aarch64)
DECLARE_NO_INITMOD(aarch64_cpu_features)
#endif // WITH_AARCH64
#ifdef WITH_NVPTX
DECLARE_LL_INITMOD(ptx_compute_20)
DECLARE_LL_INITMOD(ptx_compute_30)
DECLARE_LL_INITMOD(ptx_compute_35)
#endif // WITH_NVPTX
#if defined(WITH_D3D12) && defined(WITH_X86)
DECLARE_CPP_INITMOD(windows_d3d12compute_x86)
#else
DECLARE_NO_INITMOD(windows_d3d12compute_x86)
#endif
#ifdef WITH_D3D12
#ifdef WITH_ARM
DECLARE_INITMOD(windows_d3d12compute_arm_32)
DECLARE_INITMOD(windows_d3d12compute_arm_32_debug)
#else
DECLARE_NO_INITMOD(windows_d3d12compute_arm_32)
DECLARE_NO_INITMOD(windows_d3d12compute_arm_32_debug)
#endif
#ifdef WITH_AARCH64
DECLARE_INITMOD(windows_d3d12compute_arm_64)
DECLARE_INITMOD(windows_d3d12compute_arm_64_debug)
#else
DECLARE_NO_INITMOD(windows_d3d12compute_arm_64)
DECLARE_NO_INITMOD(windows_d3d12compute_arm_64_debug)
#endif
DECLARE_CPP_INITMOD_LOOKUP(windows_d3d12compute_arm)
#else
DECLARE_NO_INITMOD(windows_d3d12compute_arm)
#endif // WITH_D3D12
#ifdef WITH_X86
DECLARE_LL_INITMOD(x86_avx512)
DECLARE_LL_INITMOD(x86_avx2)
DECLARE_LL_INITMOD(x86_avx)
DECLARE_LL_INITMOD(x86)
DECLARE_LL_INITMOD(x86_sse41)
DECLARE_CPP_INITMOD(x86_cpu_features)
#else
DECLARE_NO_INITMOD(x86_avx512)
DECLARE_NO_INITMOD(x86_avx2)
DECLARE_NO_INITMOD(x86_avx)
DECLARE_NO_INITMOD(x86)
DECLARE_NO_INITMOD(x86_sse41)
DECLARE_NO_INITMOD(x86_cpu_features)
#endif // WITH_X86
#ifdef WITH_MIPS
DECLARE_LL_INITMOD(mips)
DECLARE_CPP_INITMOD(mips_cpu_features)
#else
DECLARE_NO_INITMOD(mips)
DECLARE_NO_INITMOD(mips_cpu_features)
#endif // WITH_MIPS
#ifdef WITH_POWERPC
DECLARE_LL_INITMOD(powerpc)
DECLARE_CPP_INITMOD(powerpc_cpu_features)
#else
DECLARE_NO_INITMOD(powerpc)
DECLARE_NO_INITMOD(powerpc_cpu_features)
#endif // WITH_POWERPC
#ifdef WITH_HEXAGON
DECLARE_LL_INITMOD(hvx_128)
DECLARE_CPP_INITMOD(hexagon_cpu_features)
#else
DECLARE_NO_INITMOD(hvx_128)
DECLARE_NO_INITMOD(hexagon_cpu_features)
#endif // WITH_HEXAGON
#ifdef WITH_WEBASSEMBLY
DECLARE_CPP_INITMOD(wasm_cpu_features)
DECLARE_LL_INITMOD(wasm_math)
#else
DECLARE_NO_INITMOD(wasm_cpu_features)
DECLARE_NO_INITMOD(wasm_math)
#endif // WITH_WEBASSEMBLY
#ifdef WITH_RISCV
//DECLARE_LL_INITMOD(riscv)
DECLARE_CPP_INITMOD(riscv_cpu_features)
#else
//DECLARE_NO_INITMOD(riscv)
DECLARE_NO_INITMOD(riscv_cpu_features)
#endif // WITH_RISCV
llvm::DataLayout get_data_layout_for_target(Target target) {
if (target.arch == Target::X86) {
if (target.bits == 32) {
if (target.os == Target::OSX) {
return llvm::DataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128");
} else if (target.os == Target::IOS) {
return llvm::DataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128");
} else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
return llvm::DataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32");
} else if (target.os == Target::Windows) {
return llvm::DataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32");
} else {
// Linux/Android
return llvm::DataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128");
}
} else { // 64-bit
if (target.os == Target::OSX) {
return llvm::DataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
} else if (target.os == Target::IOS) {
return llvm::DataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
} else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
return llvm::DataLayout("e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
} else if (target.os == Target::Windows) {
return llvm::DataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
} else {
return llvm::DataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
}
}
} else if (target.arch == Target::ARM) {
if (target.bits == 32) {
if (target.os == Target::IOS) {
return llvm::DataLayout("e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32");
} else {
return llvm::DataLayout("e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64");
}
} else { // 64-bit
if (target.os == Target::IOS) {
return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
} else if (target.os == Target::OSX) {
return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
} else if (target.os == Target::Windows) {
return llvm::DataLayout("e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128");
} else {
return llvm::DataLayout("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
}
}
} else if (target.arch == Target::MIPS) {
if (target.bits == 32) {
return llvm::DataLayout("e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64");
} else {
return llvm::DataLayout("e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128");
}
} else if (target.arch == Target::POWERPC) {
if (target.bits == 32) {
return llvm::DataLayout("e-m:e-i32:32-n32");
} else {
return llvm::DataLayout("e-m:e-i64:64-n32:64");
}
} else if (target.arch == Target::Hexagon) {
return llvm::DataLayout(
"e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8"
"-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048");
} else if (target.arch == Target::WebAssembly) {
if (target.bits == 32) {
return llvm::DataLayout("e-m:e-p:32:32-i64:64-n32:64-S128");
} else {
return llvm::DataLayout("e-m:e-p:64:64-i64:64-n32:64-S128");
}
} else if (target.arch == Target::RISCV) {
// TODO: Valdidate this data layout is correct for RISCV. Assumption is it is like MIPS.
if (target.bits == 32) {
return llvm::DataLayout("e-m:e-p:32:32-i64:64-n32-S128");
} else {
return llvm::DataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
}
} else {
internal_error << "Bad target arch: " << target.arch << "\n";
return llvm::DataLayout("unreachable");
}
}
} // namespace
namespace Internal {
llvm::Triple get_triple_for_target(const Target &target) {
llvm::Triple triple;
if (target.arch == Target::X86) {
if (target.bits == 32) {
triple.setArch(llvm::Triple::x86);
} else {
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::x86_64);
}
if (target.os == Target::Linux) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::GNU);
} else if (target.os == Target::OSX) {
triple.setVendor(llvm::Triple::Apple);
triple.setOS(llvm::Triple::MacOSX);
} else if (target.os == Target::Windows) {
triple.setVendor(llvm::Triple::PC);
triple.setOS(llvm::Triple::Win32);
triple.setEnvironment(llvm::Triple::MSVC);
if (target.has_feature(Target::JIT)) {
// Use ELF for jitting
triple.setObjectFormat(llvm::Triple::ELF);
}
} else if (target.os == Target::Android) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::Android);
} else if (target.os == Target::IOS) {
// X86 on iOS for the simulator
triple.setVendor(llvm::Triple::Apple);
triple.setOS(llvm::Triple::IOS);
} else if (target.os == Target::Fuchsia) {
triple.setOS(llvm::Triple::Fuchsia);
}
} else if (target.arch == Target::ARM) {
if (target.bits == 32) {
if (target.has_feature(Target::ARMv7s)) {
triple.setArchName("armv7s");
} else {
triple.setArch(llvm::Triple::arm);
}
} else {
user_assert(target.bits == 64) << "Target bits must be 32 or 64\n";
#ifdef WITH_AARCH64
triple.setArch(llvm::Triple::aarch64);
#else
user_error << "AArch64 llvm target not enabled in this build of Halide\n";
#endif
}
if (target.os == Target::Android) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::EABI);
} else if (target.os == Target::IOS) {
triple.setOS(llvm::Triple::IOS);
triple.setVendor(llvm::Triple::Apple);
} else if (target.os == Target::Linux) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::GNUEABIHF);
} else if (target.os == Target::Windows) {
user_assert(target.bits == 64) << "Windows ARM targets must be 64-bit.\n";
triple.setVendor(llvm::Triple::PC);
triple.setOS(llvm::Triple::Win32);
triple.setEnvironment(llvm::Triple::MSVC);
if (target.has_feature(Target::JIT)) {
// TODO(shoaibkamil): figure out a way to test this.
// Currently blocked by https://github.com/halide/Halide/issues/5040
user_error << "No JIT support for this OS/CPU combination yet.\n";
}
} else if (target.os == Target::Fuchsia) {
triple.setOS(llvm::Triple::Fuchsia);
} else if (target.os == Target::OSX) {
triple.setVendor(llvm::Triple::Apple);
triple.setOS(llvm::Triple::MacOSX);
triple.setArchName("arm64");
} else if (target.os == Target::NoOS) {
// For bare-metal environments
} else {
user_error << "No arm support for this OS\n";
}
} else if (target.arch == Target::MIPS) {
// Currently MIPS support is only little-endian.
if (target.bits == 32) {
triple.setArch(llvm::Triple::mipsel);
} else {
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::mips64el);
}
if (target.os == Target::Android) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::Android);
} else {
user_error << "No mips support for this OS\n";
}
} else if (target.arch == Target::POWERPC) {
#ifdef WITH_POWERPC
// Only ppc*-unknown-linux-gnu are supported for the time being.
user_assert(target.os == Target::Linux) << "PowerPC target is Linux-only.\n";
triple.setVendor(llvm::Triple::UnknownVendor);
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::GNU);
if (target.bits == 32) {
triple.setArch(llvm::Triple::ppc);
} else {
// Currently POWERPC64 support is only little-endian.
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::ppc64le);
}
#else
user_error << "PowerPC llvm target not enabled in this build of Halide\n";
#endif
} else if (target.arch == Target::Hexagon) {
triple.setVendor(llvm::Triple::UnknownVendor);
triple.setArch(llvm::Triple::hexagon);
triple.setObjectFormat(llvm::Triple::ELF);
} else if (target.arch == Target::WebAssembly) {
triple.setVendor(llvm::Triple::UnknownVendor);
if (target.bits == 32) {
triple.setArch(llvm::Triple::wasm32);
} else {
triple.setArch(llvm::Triple::wasm64);
}
triple.setObjectFormat(llvm::Triple::Wasm);
} else if (target.arch == Target::RISCV) {
if (target.bits == 32) {
triple.setArch(llvm::Triple::riscv32);
} else {
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::riscv64);
}
if (target.os == Target::Linux) {
triple.setOS(llvm::Triple::Linux);
// TODO: Check what options there are here.
triple.setEnvironment(llvm::Triple::GNUEABIHF);
} else if (target.os == Target::NoOS) {
// for baremetal environment
} else {
user_error << "No RISCV support for this OS\n";
}
} else {
internal_error << "Bad target arch: " << target.arch << "\n";
}
return triple;
}
} // namespace Internal
namespace {
void convert_weak_to_linkonce(llvm::GlobalValue &gv) {
llvm::GlobalValue::LinkageTypes linkage = gv.getLinkage();
if (linkage == llvm::GlobalValue::WeakAnyLinkage) {
gv.setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
} else if (linkage == llvm::GlobalValue::WeakODRLinkage) {
gv.setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
} else if (linkage == llvm::GlobalValue::ExternalWeakLinkage) {
gv.setLinkage(llvm::GlobalValue::ExternalLinkage);
}
}
// Link all modules together and with the result in modules[0], all
// other input modules are destroyed. Sets the datalayout and target
// triple appropriately for the target.
void link_modules(std::vector<std::unique_ptr<llvm::Module>> &modules, Target t,
bool allow_stripping_all_weak_functions = false) {
llvm::DataLayout data_layout = get_data_layout_for_target(t);
llvm::Triple triple = Internal::get_triple_for_target(t);
// Set the layout and triple on the modules before linking, so
// llvm doesn't complain while combining them.
for (size_t i = 0; i < modules.size(); i++) {
if (t.os == Target::Windows &&
!Internal::starts_with(modules[i]->getName().str(), "windows_")) {
// When compiling for windows, all wchars are
// 16-bit. Generic modules may have it set to 32-bit. Drop
// any module flags on the generic modules and use the
// more correct ones on the windows-specific modules to
// avoid a conflict. This is safe as long as the generic
// modules never actually use a wchar.
if (auto *module_flags = modules[i]->getModuleFlagsMetadata()) {
modules[i]->eraseNamedMetadata(module_flags);
}
}
modules[i]->setDataLayout(data_layout);
modules[i]->setTargetTriple(triple.str());
}
// Link them all together
for (size_t i = 1; i < modules.size(); i++) {
bool failed = llvm::Linker::linkModules(*modules[0],
std::move(modules[i]));
if (failed) {
internal_error << "Failure linking initial modules\n";
}
}
// Now re-mark most weak symbols as linkonce. They are only weak to
// prevent llvm from stripping them during initial module
// assembly. This means they can be stripped later.
// The symbols that we might want to call as a user even if not
// used in the Halide-generated code must remain weak. This is
// handled automatically by assuming any symbol starting with
// "halide_" that is weak will be retained. There are a few
// symbols for which this convention is not followed and these are
// in this set.
const std::set<string> retain = {"__stack_chk_guard",
"__stack_chk_fail"};
// COMDAT is not supported in MachO object files, hence it does
// not work on Mac OS or iOS. These sometimes show up in the
// runtime since we compile for an abstract target that is based
// on ELF. This code removes all Comdat items and leaves the
// symbols they were attached to as regular definitions, which
// only works if there is a single instance, which is generally
// the case for the runtime. Presumably if this isn't true,
// linking the module will fail.
//
// Comdats are left in for other platforms as they are required
// for certain things on Windows and they are useful in general in
// ELF based formats.
if (t.os == Target::IOS || t.os == Target::OSX) {
for (auto &global_obj : modules[0]->global_objects()) {
global_obj.setComdat(nullptr);
}
modules[0]->getComdatSymbolTable().clear();
}
// Enumerate the global variables.
for (auto &gv : modules[0]->globals()) {
// No variables are part of the public interface (even the ones labelled halide_)
convert_weak_to_linkonce(gv);
}
// Enumerate the functions.
for (auto &f : *modules[0]) {
const std::string f_name = Internal::get_llvm_function_name(f);
bool is_halide_extern_c_sym = Internal::starts_with(f_name, "halide_");
internal_assert(!is_halide_extern_c_sym || f.isWeakForLinker() || f.isDeclaration())
<< " for function " << f_name << "\n";
// We never want *any* Function marked as external-weak here;
// convert all of those to plain external.
if (f.getLinkage() == llvm::GlobalValue::ExternalWeakLinkage) {
f.setLinkage(llvm::GlobalValue::ExternalLinkage);
} else {
const bool can_strip = !is_halide_extern_c_sym && retain.count(f_name) == 0;
if (can_strip || allow_stripping_all_weak_functions) {
convert_weak_to_linkonce(f);
}
}
// Windows requires every symbol that's going to get merged
// has a comdat that specifies how. The linkage type alone
// isn't enough.
if (t.os == Target::Windows && f.isWeakForLinker()) {
llvm::Comdat *comdat = modules[0]->getOrInsertComdat(f_name);
comdat->setSelectionKind(llvm::Comdat::Any);
f.setComdat(comdat);
}
}
// Now remove the force-usage global that prevented clang from
// dropping functions from the initial module.
llvm::GlobalValue *llvm_used = modules[0]->getNamedGlobal("llvm.used");
if (llvm_used) {
llvm_used->eraseFromParent();
}
llvm::GlobalValue *llvm_compiler_used =
modules[0]->getNamedGlobal("llvm.compiler.used");
if (llvm_compiler_used) {
llvm_compiler_used->eraseFromParent();
}
// Also drop the dummy runtime api usage. We only needed it so
// that the declarations are retained in the module during the
// linking procedure above.
llvm::GlobalValue *runtime_api =
modules[0]->getNamedGlobal("halide_runtime_api_functions");
if (runtime_api) {
runtime_api->eraseFromParent();
}
}
} // namespace
namespace Internal {
/** When JIT-compiling on 32-bit windows, we need to rewrite calls
* to name-mangled win32 api calls to non-name-mangled versions.
*/
void undo_win32_name_mangling(llvm::Module *m) {
llvm::IRBuilder<> builder(m->getContext());
// For every function prototype...
for (llvm::Module::iterator iter = m->begin(); iter != m->end(); ++iter) {
llvm::Function &f = *iter;
string n = get_llvm_function_name(f);
// if it's a __stdcall call that starts with \01_, then we're making a win32 api call
if (f.getCallingConv() == llvm::CallingConv::X86_StdCall &&
f.empty() &&
n.size() > 2 && n[0] == 1 && n[1] == '_') {
// Unmangle the name.
string unmangled_name = n.substr(2);
size_t at = unmangled_name.rfind('@');
unmangled_name = unmangled_name.substr(0, at);
// Extern declare the unmangled version.
llvm::Function *unmangled = llvm::Function::Create(f.getFunctionType(), f.getLinkage(), unmangled_name, m);
unmangled->setCallingConv(f.getCallingConv());
// Add a body to the mangled version that calls the unmangled version.
llvm::BasicBlock *block = llvm::BasicBlock::Create(m->getContext(), "entry", &f);
builder.SetInsertPoint(block);
vector<llvm::Value *> args;
for (auto &arg : f.args()) {
args.push_back(&arg);
}
llvm::CallInst *c = builder.CreateCall(unmangled, args);
c->setCallingConv(f.getCallingConv());
if (f.getReturnType()->isVoidTy()) {
builder.CreateRetVoid();
} else {
builder.CreateRet(c);
}
}
}
}
void add_underscore_to_posix_call(llvm::CallInst *call, llvm::Function *fn, llvm::Module *m) {
string new_name = "_" + fn->getName().str();
llvm::Function *alt = m->getFunction(new_name);
if (!alt) {
alt = llvm::Function::Create(fn->getFunctionType(),
llvm::GlobalValue::ExternalLinkage,
new_name, m);
}
internal_assert(alt->getName() == new_name);
call->setCalledFunction(alt);
}
/** Windows uses _close, _open, _write, etc instead of the posix
* names. Defining stubs that redirect causes mis-compilations inside
* of mcjit, so we just rewrite uses of these functions to include an
* underscore. */
void add_underscores_to_posix_calls_on_windows(llvm::Module *m) {
string posix_fns[] = {"vsnprintf", "open", "close", "write", "fileno"};
string *posix_fns_begin = posix_fns;
string *posix_fns_end = posix_fns + sizeof(posix_fns) / sizeof(posix_fns[0]);
for (auto &fn : *m) {
for (auto &basic_block : fn) {
for (auto &instruction : basic_block) {
if (llvm::CallInst *call = llvm::dyn_cast<llvm::CallInst>(&instruction)) {
if (llvm::Function *called_fn = call->getCalledFunction()) {
if (std::find(posix_fns_begin, posix_fns_end, called_fn->getName()) != posix_fns_end) {
add_underscore_to_posix_call(call, called_fn, m);
}
}
}
}
}
}
}
std::unique_ptr<llvm::Module> link_with_wasm_jit_runtime(llvm::LLVMContext *c, const Target &t,
std::unique_ptr<llvm::Module> extra_module) {
bool bits_64 = (t.bits == 64);
bool debug = t.has_feature(Target::Debug);
// We only need to include things that must be linked in as callable entrypoints;
// things that are 'alwaysinline' can be included here but are unnecessary.
vector<std::unique_ptr<llvm::Module>> modules;
modules.push_back(std::move(extra_module));
modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_halide_buffer_t(c, bits_64, debug));
modules.push_back(get_initmod_destructors(c, bits_64, debug));
// These two aren't necessary, since they are 100% alwaysinline
// modules.push_back(get_initmod_posix_math_ll(c));
// modules.push_back(get_initmod_wasm_math_ll(c));
modules.push_back(get_initmod_tracing(c, bits_64, debug));
modules.push_back(get_initmod_cache(c, bits_64, debug));
modules.push_back(get_initmod_to_string(c, bits_64, debug));
modules.push_back(get_initmod_alignment_32(c, bits_64, debug));
modules.push_back(get_initmod_device_interface(c, bits_64, debug));
modules.push_back(get_initmod_metadata(c, bits_64, debug));
modules.push_back(get_initmod_float16_t(c, bits_64, debug));
modules.push_back(get_initmod_errors(c, bits_64, debug));
modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
// We don't want anything marked as weak for the wasm-jit runtime,
// so convert all of them to linkonce
constexpr bool allow_stripping_all_weak_functions = true;
link_modules(modules, t, allow_stripping_all_weak_functions);
return std::move(modules[0]);
}
/** Create an llvm module containing the support code for a given target. */
std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVMContext *c, bool for_shared_jit_runtime, bool just_gpu) {
enum InitialModuleType {
ModuleAOT,
ModuleAOTNoRuntime,
ModuleJITShared,
ModuleJITInlined,
ModuleGPU
} module_type;
if (t.has_feature(Target::JIT)) {
if (just_gpu) {
module_type = ModuleGPU;
} else if (for_shared_jit_runtime) {
module_type = ModuleJITShared;
} else {
module_type = ModuleJITInlined;
}
} else if (t.has_feature(Target::NoRuntime)) {
module_type = ModuleAOTNoRuntime;
} else {
module_type = ModuleAOT;
}
// Halide::Internal::debug(0) << "Getting initial module type " << (int)module_type << "\n";
internal_assert(t.bits == 32 || t.bits == 64)
<< "Bad target: " << t.to_string();
bool bits_64 = (t.bits == 64);
bool debug = t.has_feature(Target::Debug);
bool tsan = t.has_feature(Target::TSAN);
vector<std::unique_ptr<llvm::Module>> modules;
if (module_type != ModuleGPU) {
if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
// OS-dependent modules
if (t.os == Target::Linux) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
if (t.arch == Target::X86) {
modules.push_back(get_initmod_linux_clock(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
}
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_linux_yield(c, bits_64, debug));
if (tsan) {
modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
}
modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
} else if (t.os == Target::WebAssemblyRuntime) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_linux_yield(c, bits_64, debug));
if (t.has_feature(Target::WasmThreads)) {
// Assume that the wasm libc will be providing pthreads
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
} else {
modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
}
modules.push_back(get_initmod_fake_get_symbol(c, bits_64, debug));
} else if (t.os == Target::OSX) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_osx_clock(c, bits_64, debug));
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_osx_yield(c, bits_64, debug));
if (tsan) {
modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
}
modules.push_back(get_initmod_osx_get_symbol(c, bits_64, debug));
modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
} else if (t.os == Target::Android) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
if (t.arch == Target::ARM) {
modules.push_back(get_initmod_android_clock(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
}
modules.push_back(get_initmod_android_io(c, bits_64, debug));
modules.push_back(get_initmod_android_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_linux_yield(c, bits_64, debug)); // TODO: verify
if (tsan) {
modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
}
modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
} else if (t.os == Target::Windows) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_windows_clock(c, bits_64, debug));
modules.push_back(get_initmod_windows_io(c, bits_64, debug));
modules.push_back(get_initmod_windows_yield(c, bits_64, debug));
if (tsan) {
modules.push_back(get_initmod_windows_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_windows_threads(c, bits_64, debug));
}
modules.push_back(get_initmod_windows_get_symbol(c, bits_64, debug));
} else if (t.os == Target::IOS) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
modules.push_back(get_initmod_ios_io(c, bits_64, debug));
modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_osx_yield(c, bits_64, debug));
if (tsan) {
modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
}
} else if (t.os == Target::QuRT) {
modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
modules.push_back(get_initmod_qurt_yield(c, bits_64, debug));
if (tsan) {
modules.push_back(get_initmod_qurt_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_qurt_threads(c, bits_64, debug));
}
modules.push_back(get_initmod_qurt_init_fini(c, bits_64, debug));
} else if (t.os == Target::NoOS) {
// The OS-specific symbols provided by the modules
// above are expected to be provided by the containing
// process instead at link time. Less aggressive than
// NoRuntime, as OS-agnostic modules like tracing are
// still included below.
if (t.arch == Target::Hexagon) {
modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
}
modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
} else if (t.os == Target::Fuchsia) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_fuchsia_clock(c, bits_64, debug));
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_fuchsia_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_fuchsia_yield(c, bits_64, debug));
if (tsan) {
modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
}
modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
}
}
if (module_type != ModuleJITShared) {
// The first module for inline only case has to be C/C++ compiled otherwise the
// datalayout is not properly setup.
modules.push_back(get_initmod_halide_buffer_t(c, bits_64, debug));
modules.push_back(get_initmod_destructors(c, bits_64, debug));
modules.push_back(get_initmod_pseudostack(c, bits_64, debug));
// Math intrinsics vary slightly across platforms
if (t.os == Target::Windows) {
if (t.bits == 32) {
modules.push_back(get_initmod_win32_math_ll(c));
} else {
modules.push_back(get_initmod_posix_math_ll(c));
}
} else {
modules.push_back(get_initmod_posix_math_ll(c));
}
}
if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
// These modules are always used and shared
modules.push_back(get_initmod_gpu_device_selection(c, bits_64, debug));
if (t.arch != Target::Hexagon) {
// These modules don't behave correctly on a real
// Hexagon device (they do work in the simulator
// though...).
modules.push_back(get_initmod_tracing(c, bits_64, debug));
modules.push_back(get_initmod_trace_helper(c, bits_64, debug));
modules.push_back(get_initmod_write_debug_image(c, bits_64, debug));
// TODO: Support this module in the Hexagon backend,
// currently generates assert at src/HexagonOffload.cpp:279
modules.push_back(get_initmod_cache(c, bits_64, debug));
}
modules.push_back(get_initmod_to_string(c, bits_64, debug));
if (t.arch == Target::Hexagon ||
t.has_feature(Target::HVX)) {
modules.push_back(get_initmod_alignment_128(c, bits_64, debug));
} else if (t.arch == Target::X86) {
// AVX-512 requires 64-byte alignment. Could only increase alignment
// if AVX-512 is in the target, but that falls afoul of linking
// multiple versions of a filter for different levels of x86 -- weak
// linking will pick one of the alignment modules unpredictably.
// Another way to go is to query the CPU features and align by
// 64 oonly if the procesor has AVX-512.
// The choice to go 64 all the time is for simplicity and on the idea
// that it won't be a noticeable cost in the majority of x86 usage.
modules.push_back(get_initmod_alignment_64(c, bits_64, debug));
} else {
modules.push_back(get_initmod_alignment_32(c, bits_64, debug));
}
modules.push_back(get_initmod_allocation_cache(c, bits_64, debug));
modules.push_back(get_initmod_device_interface(c, bits_64, debug));
modules.push_back(get_initmod_metadata(c, bits_64, debug));
modules.push_back(get_initmod_float16_t(c, bits_64, debug));
modules.push_back(get_initmod_errors(c, bits_64, debug));
// Some environments don't support the atomics the profiler requires.
if (t.arch != Target::MIPS && t.os != Target::NoOS && t.os != Target::QuRT) {
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_profiler(c, bits_64, debug));
} else {
modules.push_back(get_initmod_profiler(c, bits_64, debug));
}
}
if (t.has_feature(Target::MSAN)) {
modules.push_back(get_initmod_msan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
}
}
if (module_type != ModuleJITShared) {
// These modules are optional
if (t.arch == Target::X86) {
modules.push_back(get_initmod_x86_ll(c));
}
if (t.arch == Target::ARM) {
if (t.bits == 64) {
modules.push_back(get_initmod_aarch64_ll(c));
} else if (t.has_feature(Target::ARMv7s)) {
modules.push_back(get_initmod_arm_ll(c));
} else if (!t.has_feature(Target::NoNEON)) {
modules.push_back(get_initmod_arm_ll(c));
} else {
modules.push_back(get_initmod_arm_no_neon_ll(c));
}
}
if (t.arch == Target::MIPS) {
modules.push_back(get_initmod_mips_ll(c));
}
if (t.arch == Target::POWERPC) {
modules.push_back(get_initmod_powerpc_ll(c));
}
if (t.arch == Target::Hexagon) {
modules.push_back(get_initmod_qurt_hvx(c, bits_64, debug));
modules.push_back(get_initmod_hvx_128_ll(c));
if (t.features_any_of({Target::HVX_v65, Target::HVX_v66})) {
modules.push_back(get_initmod_qurt_hvx_vtcm(c, bits_64,
debug));
}
} else {
modules.push_back(get_initmod_prefetch(c, bits_64, debug));
}
if (t.has_feature(Target::SSE41)) {
modules.push_back(get_initmod_x86_sse41_ll(c));
}
if (t.has_feature(Target::AVX)) {
modules.push_back(get_initmod_x86_avx_ll(c));
}
if (t.has_feature(Target::AVX2)) {
modules.push_back(get_initmod_x86_avx2_ll(c));
}
if (t.has_feature(Target::AVX512)) {
modules.push_back(get_initmod_x86_avx512_ll(c));
}
if (t.has_feature(Target::Profile)) {
user_assert(t.os != Target::WebAssemblyRuntime) << "The profiler cannot be used in a threadless environment.";
modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
}
if (t.arch == Target::WebAssembly) {
modules.push_back(get_initmod_wasm_math_ll(c));
}
}
if (module_type == ModuleAOT) {
// These modules are only used for AOT compilation
modules.push_back(get_initmod_can_use_target(c, bits_64, debug));
if (t.arch == Target::X86) {
modules.push_back(get_initmod_x86_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::ARM) {
if (t.bits == 64) {
modules.push_back(get_initmod_aarch64_cpu_features(c, bits_64, debug));
} else {
modules.push_back(get_initmod_arm_cpu_features(c, bits_64, debug));
}
}
if (t.arch == Target::MIPS) {
modules.push_back(get_initmod_mips_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::POWERPC) {
modules.push_back(get_initmod_powerpc_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::Hexagon) {
modules.push_back(get_initmod_hexagon_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::RISCV) {
modules.push_back(get_initmod_riscv_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::WebAssembly) {
modules.push_back(get_initmod_wasm_cpu_features(c, bits_64, debug));
}
}
}
if (module_type == ModuleJITShared || module_type == ModuleGPU) {
modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
} else if (module_type == ModuleAOT) {
modules.push_back(get_initmod_module_aot_ref_count(c, bits_64, debug));
}
if (module_type == ModuleAOT || module_type == ModuleGPU) {
if (t.has_feature(Target::CUDA)) {
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_cuda(c, bits_64, debug));
} else {
modules.push_back(get_initmod_cuda(c, bits_64, debug));
}
}
if (t.has_feature(Target::OpenCL)) {
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_opencl(c, bits_64, debug));
} else {
modules.push_back(get_initmod_opencl(c, bits_64, debug));
}
}
if (t.has_feature(Target::OpenGLCompute)) {
modules.push_back(get_initmod_openglcompute(c, bits_64, debug));
if (t.os == Target::Android) {
// Only platform that supports OpenGL Compute for now.
modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
} else if (t.os == Target::Linux) {
if (t.has_feature(Target::EGL)) {
modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
} else {
modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug));
}
} else if (t.os == Target::OSX) {
modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
} else {
// You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context
}
}
if (t.has_feature(Target::Metal)) {
modules.push_back(get_initmod_metal(c, bits_64, debug));
if (t.arch == Target::ARM) {
modules.push_back(get_initmod_metal_objc_arm(c, bits_64, debug));
} else if (t.arch == Target::X86) {
modules.push_back(get_initmod_metal_objc_x86(c, bits_64, debug));
} else {
user_error << "Metal can only be used on ARM or X86 architectures.\n";
}
}
if (t.has_feature(Target::D3D12Compute)) {
user_assert(bits_64) << "D3D12Compute target only available on 64-bit targets for now.\n";
user_assert(t.os == Target::Windows) << "D3D12Compute target only available on Windows targets.\n";
if (t.arch == Target::X86) {
modules.push_back(get_initmod_windows_d3d12compute_x86(c, bits_64, debug));
} else if (t.arch == Target::ARM) {
modules.push_back(get_initmod_windows_d3d12compute_arm(c, bits_64, debug));
} else {
user_error << "Direct3D 12 can only be used on ARM or X86 architectures.\n";
}
}
if (t.arch != Target::Hexagon && t.has_feature(Target::HVX)) {
modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
modules.push_back(get_initmod_hexagon_host(c, bits_64, debug));
}
if (t.has_feature(Target::HexagonDma)) {
modules.push_back(get_initmod_hexagon_cache_allocator(c, bits_64, debug));
modules.push_back(get_initmod_hexagon_dma(c, bits_64, debug));
modules.push_back(get_initmod_hexagon_dma_pool(c, bits_64, debug));
}
}
if (module_type == ModuleAOT && t.has_feature(Target::Matlab)) {
modules.push_back(get_initmod_matlab(c, bits_64, debug));
}
if (module_type == ModuleAOTNoRuntime ||
module_type == ModuleJITInlined ||
t.os == Target::NoOS) {
modules.push_back(get_initmod_runtime_api(c, bits_64, debug));
}
link_modules(modules, t);
if (t.os == Target::Windows &&
t.bits == 32 &&
(t.has_feature(Target::JIT))) {
undo_win32_name_mangling(modules[0].get());
}
if (t.os == Target::Windows) {
add_underscores_to_posix_calls_on_windows(modules[0].get());
}
return std::move(modules[0]);
}
#ifdef WITH_NVPTX
std::unique_ptr<llvm::Module> get_initial_module_for_ptx_device(Target target, llvm::LLVMContext *c) {
std::vector<std::unique_ptr<llvm::Module>> modules;
modules.push_back(get_initmod_ptx_dev_ll(c));
std::unique_ptr<llvm::Module> module;
// This table is based on the guidance at:
// http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#linking-with-libdevice
if (target.has_feature(Target::CUDACapability35)) {
module = get_initmod_ptx_compute_35_ll(c);
} else if (target.features_any_of({Target::CUDACapability32,
Target::CUDACapability50})) {
// For some reason sm_32 and sm_50 use libdevice 20
module = get_initmod_ptx_compute_20_ll(c);
} else if (target.has_feature(Target::CUDACapability30)) {
module = get_initmod_ptx_compute_30_ll(c);
} else {
module = get_initmod_ptx_compute_20_ll(c);
}
modules.push_back(std::move(module));
link_modules(modules, target);
// For now, the PTX backend does not handle calling functions. So mark all functions
// AvailableExternally to ensure they are inlined or deleted.
for (llvm::Module::iterator iter = modules[0]->begin(); iter != modules[0]->end(); iter++) {
llvm::Function &f = *iter;
// This is intended to set all definitions (not extern declarations)
// to "available externally" which should guarantee they do not exist
// after the resulting module is finalized to code. That is they must
// be inlined to be used.
//
// However libdevice has a few routines that are marked
// "noinline" which must either be changed to alow inlining or
// preserved in generated code. This preserves the intent of
// keeping these routines out-of-line and hence called by
// not marking them AvailableExternally.
if (!f.isDeclaration() && !f.hasFnAttribute(llvm::Attribute::NoInline)) {
f.setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
}
}
llvm::Triple triple("nvptx64--");
modules[0]->setTargetTriple(triple.str());
llvm::DataLayout dl("e-i64:64-v16:16-v32:32-n16:32:64");
modules[0]->setDataLayout(dl);
return std::move(modules[0]);
}
#endif
void add_bitcode_to_module(llvm::LLVMContext *context, llvm::Module &module,
const std::vector<uint8_t> &bitcode, const std::string &name) {
llvm::StringRef sb = llvm::StringRef((const char *)&bitcode[0], bitcode.size());
std::unique_ptr<llvm::Module> add_in = parse_bitcode_file(sb, context, name.c_str());
bool failed = llvm::Linker::linkModules(module, std::move(add_in));
if (failed) {
internal_error << "Failure linking in additional module: " << name << "\n";
}
}
} // namespace Internal
} // namespace Halide