#include "LLVM_Output.h" #include "CodeGen_C.h" #include "CodeGen_Internal.h" #include "CodeGen_LLVM.h" #include "CompilerLogger.h" #include "LLVM_Headers.h" #include "LLVM_Runtime_Linker.h" #include #include #ifdef _WIN32 #ifndef NOMINMAX #define NOMINMAX #endif #include #else #include #include #include #endif namespace Halide { namespace Internal { namespace Archive { namespace { // This is a bare-bones Windows .lib file writer, based on inspection // of the LLVM ArchiveWriter class and the documentation at // https://www.microsoft.com/msj/0498/hood0498.aspx and // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680547(v=vs.85).aspx#archive__library__file_format // // It has been compared with the output of VS2015's lib.exe and appears to be // bit-identical (to meaningful bits, anyway) for a sampling of Halide // AOT output, but it is quite possible that there are omissions, mistakes, // or just plain bugs. // Emit a field that is 'size' characters wide. // If data too small, pad on the right with spaces. // If data too large, assert. // Return the offset at which 'data' was written. template size_t emit_padded(std::ostream &out, T data, size_t size) { size_t pos = out.tellp(); out << data; size_t written = (size_t)out.tellp() - pos; internal_assert(written <= size); while (written < size) { out.put(' '); written++; } return pos; } using EmitU32 = std::function; void emit_big_endian_u32(std::ostream &out, uint32_t value) { out << static_cast((value >> 24) & 0xff) << static_cast((value >> 16) & 0xff) << static_cast((value >> 8) & 0xff) << static_cast((value)&0xff); } void emit_little_endian_u32(std::ostream &out, uint32_t value) { out << static_cast((value)&0xff) << static_cast((value >> 8) & 0xff) << static_cast((value >> 16) & 0xff) << static_cast((value >> 24) & 0xff); } void emit_little_endian_u16(std::ostream &out, uint16_t value) { out << static_cast((value)&0xff) << static_cast((value >> 8) & 0xff); } // Return the offset at which 'size' was written size_t finish_member_header(std::ostream &out, size_t size) { // Emit zero for all of these, to mimic the 'deterministic' flag emit_padded(out, 0, 12); // timestamp emit_padded(out, ' ', 6); // UID emit_padded(out, ' ', 6); // GID emit_padded(out, 0, 8); // perm const size_t pos = emit_padded(out, size, 10); // total size of the archive member (not including header) out << "\x60\x0A"; return pos; } std::string member_name(const llvm::NewArchiveMember &m) { return m.MemberName.str(); } std::map write_string_table(std::ostream &out, const std::vector &members) { std::map string_to_offset_map; size_t start_offset = 0; for (const llvm::NewArchiveMember &m : members) { std::string name = member_name(m); internal_assert(string_to_offset_map.count(name) == 0); if (name.size() < 16 && name.find('/') == std::string::npos) { // small strings that don't contain '/' can be inlined continue; } if (start_offset == 0) { emit_padded(out, "//", 16); finish_member_header(out, 0); start_offset = out.tellp(); } string_to_offset_map[name] = (size_t)out.tellp() - start_offset; out << name; out.put('\0'); } // If all strings are short enough, we skip the string table entirely if (start_offset != 0) { size_t member_end = out.tellp(); if (out.tellp() % 2) { out.put('\x0A'); } size_t final_offset = out.tellp(); out.seekp(start_offset - 12); emit_padded(out, member_end - start_offset, 10); out.seekp(final_offset); } return string_to_offset_map; } struct PatchInfo { EmitU32 emit_u32; size_t pos; }; void write_symbol_table(std::ostream &out, const std::vector &members, bool windows_coff_format, std::map> *patchers) { internal_assert(!members.empty()); EmitU32 emit_u32 = windows_coff_format ? emit_little_endian_u32 : emit_big_endian_u32; // Write zero for sizes/offsets that will be patched later. const size_t kPatchLater = 0; std::map name_to_member_index; const auto kFileMagicUnknown = llvm::file_magic::unknown; llvm::LLVMContext context; for (size_t i = 0, n = members.size(); i < n; ++i) { llvm::MemoryBufferRef member_buffer = members[i].Buf->getMemBufferRef(); llvm::Expected> obj_or_err = llvm::object::SymbolicFile::createSymbolicFile( member_buffer, kFileMagicUnknown, &context); if (!obj_or_err) { // Don't use internal_assert: the call to new_member.takeError() will be // evaluated even if the assert does not fail, leaving new_member in an // indeterminate state. internal_error << llvm::toString(obj_or_err.takeError()) << "\n"; } llvm::object::SymbolicFile &obj = *obj_or_err.get(); for (const auto &sym : obj.symbols()) { auto flags = sym.getFlags(); if (!flags) { internal_error << llvm::toString(flags.takeError()) << "\n"; } const uint32_t sym_flags = flags.get(); if (sym_flags & llvm::object::SymbolRef::SF_FormatSpecific) { continue; } if (!(sym_flags & llvm::object::SymbolRef::SF_Global)) { continue; } if ((sym_flags & llvm::object::SymbolRef::SF_Undefined) && !(sym_flags & llvm::object::SymbolRef::SF_Indirect)) { continue; } // Windows COFF doesn't support weak symbols. if (sym_flags & llvm::object::SymbolRef::SF_Weak) { continue; } llvm::SmallString<128> symbols_buf; llvm::raw_svector_ostream symbols(symbols_buf); auto err = sym.printName(symbols); internal_assert(!err); std::string name = symbols.str().str(); if (name_to_member_index.find(name) != name_to_member_index.end()) { user_warning << "Warning: symbol '" << name << "' seen multiple times in library.\n"; continue; } name_to_member_index[name] = i; } } size_t header_start_offset = emit_padded(out, "/", 16); size_t symbol_table_size_offset = finish_member_header(out, kPatchLater); // size of symbol table size_t symbol_count_offset = 0; if (windows_coff_format) { emit_u32(out, members.size()); for (size_t i = 0, n = members.size(); i < n; ++i) { size_t pos = out.tellp(); emit_u32(out, kPatchLater); // offset to this .obj member (*patchers)[i].push_back({emit_u32, pos}); } symbol_count_offset = out.tellp(); emit_u32(out, kPatchLater); // number of symbols // symbol-to-archive-member-index, but 1-based rather than zero-based. for (auto &it : name_to_member_index) { internal_assert(it.second <= 65534); emit_little_endian_u16(out, (uint16_t)it.second + 1); } } else { symbol_count_offset = out.tellp(); emit_u32(out, kPatchLater); // number of symbols for (auto &it : name_to_member_index) { size_t pos = out.tellp(); emit_u32(out, kPatchLater); // offset to the .obj member containing this symbol (*patchers)[it.second].push_back({emit_u32, pos}); } } // Symbol table goes at the end for both variants. for (auto &it : name_to_member_index) { out << it.first; out.put('\0'); } size_t member_end = out.tellp(); // lib.exe pads to 2-byte align with 0x0a if (out.tellp() % 2) { out.put('\x0A'); } size_t final_offset = out.tellp(); // Patch the size of the symbol table. const size_t member_header_size = 60; out.seekp(symbol_table_size_offset); emit_padded(out, member_end - member_header_size - header_start_offset, 10); // Patch the number of symbols. out.seekp(symbol_count_offset); emit_u32(out, name_to_member_index.size()); // Seek back to where we left off. out.seekp(final_offset); } void write_coff_archive(std::ostream &out, const std::vector &members) { out << "!\x0A"; // First member is named "/" and is the traditional symbol table, // with big-endian offsets. std::map> patchers; write_symbol_table(out, members, false, &patchers); // Second member (for Windows COFF) is also named "/" and is also a symbol table, // but with little-endian offsets and with symbols sorted by name. (We actually sort // both tables as a side-effect, but the first isn't required to be sorted.) write_symbol_table(out, members, true, &patchers); // Third member, named "//", is the optional string table. (MS docs say it is required but // lib.exe only emits as needed, so we will follow its example) std::map string_to_offset_map = write_string_table(out, members); // The remaining members are just (header + contents of .obj file). std::vector member_offset; for (const llvm::NewArchiveMember &m : members) { size_t pos = out.tellp(); member_offset.push_back(pos); std::string name = member_name(m); auto it = string_to_offset_map.find(name); if (it != string_to_offset_map.end()) { out.put('/'); emit_padded(out, it->second, 15); } else { emit_padded(out, name + "/", 16); } size_t size = m.Buf->getBufferSize(); finish_member_header(out, size); out << m.Buf->getMemBufferRef().getBuffer().str(); if (out.tellp() % 2) { out.put('\x0A'); } } for (auto &it : patchers) { size_t i = it.first; for (auto &patcher : it.second) { out.seekp(patcher.pos); patcher.emit_u32(out, member_offset.at(i)); } } } } // namespace } // namespace Archive } // namespace Internal std::unique_ptr make_raw_fd_ostream(const std::string &filename) { std::string error_string; std::error_code err; std::unique_ptr raw_out(new llvm::raw_fd_ostream(filename, err, llvm::sys::fs::OF_None)); if (err) { error_string = err.message(); } internal_assert(error_string.empty()) << "Error opening output " << filename << ": " << error_string << "\n"; return raw_out; } namespace { // llvm::CloneModule has issues with debug info. As a workaround, // serialize it to bitcode in memory, and then parse the bitcode back in. std::unique_ptr clone_module(const llvm::Module &module_in) { Internal::debug(2) << "Cloning module " << module_in.getName().str() << "\n"; // Write the module to a buffer. llvm::SmallVector clone_buffer; llvm::raw_svector_ostream clone_ostream(clone_buffer); WriteBitcodeToFile(module_in, clone_ostream); // Read it back in. llvm::MemoryBufferRef buffer_ref(llvm::StringRef(clone_buffer.data(), clone_buffer.size()), "clone_buffer"); auto cloned_module = llvm::parseBitcodeFile(buffer_ref, module_in.getContext()); internal_assert(cloned_module); return std::move(cloned_module.get()); } } // namespace void emit_file(const llvm::Module &module_in, Internal::LLVMOStream &out, llvm::CodeGenFileType file_type) { Internal::debug(1) << "emit_file.Compiling to native code...\n"; Internal::debug(2) << "Target triple: " << module_in.getTargetTriple() << "\n"; auto time_start = std::chrono::high_resolution_clock::now(); // Work on a copy of the module to avoid modifying the original. std::unique_ptr module = clone_module(module_in); // Get the target specific parser. auto target_machine = Internal::make_target_machine(*module); internal_assert(target_machine.get()) << "Could not allocate target machine!\n"; llvm::DataLayout target_data_layout(target_machine->createDataLayout()); if (!(target_data_layout == module->getDataLayout())) { internal_error << "Warning: module's data layout does not match target machine's\n" << target_data_layout.getStringRepresentation() << "\n" << module->getDataLayout().getStringRepresentation() << "\n"; } // Build up all of the passes that we want to do to the module. // NOTE: use of the "legacy" PassManager here is still required; it is deprecated // for optimization, but is still the only complete API for codegen as of work-in-progress // LLVM14. At the time of this comment (Dec 2021), there is no firm plan as to when codegen will // be fully available in the new PassManager, so don't worry about this 'legacy' // tag until there's any indication that the old APIs start breaking. // // See: // https://lists.llvm.org/pipermail/llvm-dev/2021-April/150100.html // https://releases.llvm.org/13.0.0/docs/ReleaseNotes.html#changes-to-the-llvm-ir // https://groups.google.com/g/llvm-dev/c/HoS07gXx0p8 llvm::legacy::PassManager pass_manager; pass_manager.add(new llvm::TargetLibraryInfoWrapperPass(llvm::Triple(module->getTargetTriple()))); // Make sure things marked as always-inline get inlined pass_manager.add(llvm::createAlwaysInlinerLegacyPass()); #if LLVM_VERSION < 170 // Remove any stale debug info // // Note: this pass was added in https://github.com/halide/Halide/pull/2060; // based on the comments, it looks like it was an attempt to fix an error, // but didn't actually fix it, and (apparently) just got left in? // // There is a 'new' equivalent that we could add in the optimization pass // in Codegen_LLVM.cpp, but since this seems to be have added in error, // we're just going to elide it for LLVM >= 17.0 pass_manager.add(llvm::createStripDeadDebugInfoPass()); // Enable symbol rewriting. This allows code outside libHalide to // use symbol rewriting when compiling Halide code (for example, by // using cl::ParseCommandLineOption and then passing the appropriate // rewrite options via -mllvm flags). pass_manager.add(llvm::createRewriteSymbolsPass()); #endif if (target_machine->isPositionIndependent()) { Internal::debug(1) << "Target machine is Position Independent!\n"; } // Override default to generate verbose assembly. target_machine->Options.MCOptions.AsmVerbose = true; // Ask the target to add backend passes as necessary. target_machine->addPassesToEmitFile(pass_manager, out, nullptr, file_type); pass_manager.run(*module); auto *logger = Internal::get_compiler_logger(); if (logger) { auto time_end = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = time_end - time_start; logger->record_compilation_time(Internal::CompilerLogger::Phase::LLVM, diff.count()); } // If -time-passes is in HL_LLVM_ARGS, this will print llvm passes time statstics otherwise its no-op. llvm::reportAndResetTimings(); } std::unique_ptr compile_module_to_llvm_module(const Module &module, llvm::LLVMContext &context) { return codegen_llvm(module, context); } void compile_llvm_module_to_object(llvm::Module &module, Internal::LLVMOStream &out) { #if LLVM_VERSION >= 180 emit_file(module, out, llvm::CodeGenFileType::ObjectFile); #else emit_file(module, out, llvm::CGFT_ObjectFile); #endif } void compile_llvm_module_to_assembly(llvm::Module &module, Internal::LLVMOStream &out) { #if LLVM_VERSION >= 180 emit_file(module, out, llvm::CodeGenFileType::AssemblyFile); #else emit_file(module, out, llvm::CGFT_AssemblyFile); #endif } void compile_llvm_module_to_llvm_bitcode(llvm::Module &module, Internal::LLVMOStream &out) { WriteBitcodeToFile(module, out); } void compile_llvm_module_to_llvm_assembly(llvm::Module &module, Internal::LLVMOStream &out) { module.print(out, nullptr); } // Note that the utilities for get/set working directory are deliberately *not* in Util.h; // generally speaking, you shouldn't ever need or want to do this, and doing so is asking for // trouble. This exists solely to work around an issue with LLVM, hence its restricted // location. If we ever legitimately need this elsewhere, consider moving it to Util.h. namespace { std::string get_current_directory() { #ifdef _WIN32 DWORD dir_buf_size = GetCurrentDirectoryW(0, nullptr); internal_assert(dir_buf_size) << "GetCurrentDirectoryW() failed; error " << GetLastError() << "\n"; // GetCurrentDirectoryW returns a _buffer size_, not a character count. // std::wstring null-terminates on its own, so don't count that here. std::wstring wdir(dir_buf_size - 1, 0); DWORD ret = GetCurrentDirectoryW(dir_buf_size, &wdir[0]); internal_assert(ret) << "GetCurrentDirectoryW() failed; error " << GetLastError() << "\n"; int dir_len = WideCharToMultiByte(CP_UTF8, 0, &wdir[0], (int)wdir.size(), nullptr, 0, nullptr, nullptr); internal_assert(dir_len) << "WideCharToMultiByte() failed; error " << GetLastError() << "\n"; std::string dir(dir_len, 0); ret = WideCharToMultiByte(CP_UTF8, 0, &wdir[0], (int)wdir.size(), &dir[0], (int)dir.size(), nullptr, nullptr); internal_assert(ret) << "WideCharToMultiByte() failed; error " << GetLastError() << "\n"; return dir; #else std::string dir; // Note that passing null for the first arg isn't strictly POSIX, but is // supported everywhere we currently build. char *p = getcwd(nullptr, 0); internal_assert(p != nullptr) << "getcwd() failed"; dir = p; free(p); return dir; #endif } void set_current_directory(const std::string &d) { #ifdef _WIN32 int n_chars = MultiByteToWideChar(CP_UTF8, 0, &d[0], (int)d.size(), nullptr, 0); internal_assert(n_chars) << "MultiByteToWideChar() failed; error " << GetLastError() << "\n"; std::wstring wd(n_chars, 0); int ret = MultiByteToWideChar(CP_UTF8, 0, &d[0], (int)d.size(), &wd[0], wd.size()); internal_assert(ret) << "MultiByteToWideChar() failed; error " << GetLastError() << "\n"; internal_assert(SetCurrentDirectoryW(wd.c_str())) << "SetCurrentDirectoryW() failed; error " << GetLastError() << "\n"; #else internal_assert(chdir(d.c_str()) == 0) << "chdir() failed"; #endif } std::pair dir_and_file(const std::string &path) { std::string dir, file; size_t slash_pos = path.rfind('/'); #ifdef _WIN32 if (slash_pos == std::string::npos) { // Windows is a thing slash_pos = path.rfind('\\'); } #endif if (slash_pos != std::string::npos) { dir = path.substr(0, slash_pos); file = path.substr(slash_pos + 1); } else { file = path; } return {dir, file}; } std::string make_absolute_path(const std::string &path) { bool is_absolute = !path.empty() && path[0] == '/'; char sep = '/'; #ifdef _WIN32 // Allow for C:\whatever or c:/whatever on Windows if (path.size() >= 3 && path[1] == ':' && (path[2] == '\\' || path[2] == '/')) { is_absolute = true; sep = path[2]; } else if (path.size() > 2 && path[0] == '\\' && path[1] == '\\') { // Also allow for UNC-style paths beginning with double-backslash is_absolute = true; sep = path[0]; } #endif if (!is_absolute) { return get_current_directory() + sep + path; } return path; } struct SetCwd { const std::string original_directory; explicit SetCwd(const std::string &d) : original_directory(get_current_directory()) { if (!d.empty()) { set_current_directory(d); } } ~SetCwd() { set_current_directory(original_directory); } }; } // namespace void create_static_library(const std::vector &src_files_in, const Target &target, const std::string &dst_file_in, bool deterministic) { internal_assert(!src_files_in.empty()); // Ensure that dst_file is an absolute path, since we're going to change the // working directory temporarily. std::string dst_file = make_absolute_path(dst_file_in); // If we give absolute paths to LLVM, it will dutifully embed them in the resulting // .a file; some versions of 'ar x' are unable to deal with the resulting files, // which is inconvenient. So let's doctor the inputs to be simple filenames, // and temporarily change the working directory. (Note that this requires all the // input files be in the same directory; this is currently always the case for // our existing usage.) std::string src_dir = dir_and_file(src_files_in.front()).first; std::vector src_files; for (const auto &s_in : src_files_in) { auto df = dir_and_file(s_in); internal_assert(df.first == src_dir) << "All inputs to create_static_library() must be in the same directory"; for (auto &s_existing : src_files) { internal_assert(s_existing != df.second) << "create_static_library() does not allow duplicate filenames."; } src_files.push_back(df.second); } SetCwd set_cwd(src_dir); std::vector new_members; for (auto &src : src_files) { llvm::Expected new_member = llvm::NewArchiveMember::getFile(src, /*Deterministic=*/true); if (!new_member) { // Don't use internal_assert: the call to new_member.takeError() will be evaluated // even if the assert does not fail, leaving new_member in an indeterminate // state. internal_error << src << ": " << llvm::toString(new_member.takeError()) << "\n"; } new_members.push_back(std::move(*new_member)); } // LLVM can't write MS PE/COFF Lib format, which is almost-but-not-quite // the same as GNU ar format. if (Internal::get_triple_for_target(target).isWindowsMSVCEnvironment()) { std::ofstream f(dst_file, std::ios_base::trunc | std::ios_base::binary); Internal::Archive::write_coff_archive(f, new_members); f.flush(); f.close(); return; } #if LLVM_VERSION >= 180 const llvm::SymtabWritingMode write_symtab = llvm::SymtabWritingMode::NormalSymtab; #else const bool write_symtab = true; #endif const auto kind = Internal::get_triple_for_target(target).isOSDarwin() ? llvm::object::Archive::K_BSD : llvm::object::Archive::K_GNU; const bool thin = false; auto result = llvm::writeArchive(dst_file, new_members, write_symtab, kind, deterministic, thin, nullptr); internal_assert(!result) << "Failed to write archive: " << dst_file << ", reason: " << llvm::toString(std::move(result)) << "\n"; } } // namespace Halide