diff options
Diffstat (limited to 'contrib/llvm/lib/LTO')
-rw-r--r-- | contrib/llvm/lib/LTO/Caching.cpp | 99 | ||||
-rw-r--r-- | contrib/llvm/lib/LTO/LTO.cpp | 868 | ||||
-rw-r--r-- | contrib/llvm/lib/LTO/LTOBackend.cpp | 375 | ||||
-rw-r--r-- | contrib/llvm/lib/LTO/LTOCodeGenerator.cpp | 122 | ||||
-rw-r--r-- | contrib/llvm/lib/LTO/LTOModule.cpp | 142 | ||||
-rw-r--r-- | contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 374 | ||||
-rw-r--r-- | contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp | 48 |
7 files changed, 1775 insertions, 253 deletions
diff --git a/contrib/llvm/lib/LTO/Caching.cpp b/contrib/llvm/lib/LTO/Caching.cpp new file mode 100644 index 0000000..fd5bdb0 --- /dev/null +++ b/contrib/llvm/lib/LTO/Caching.cpp @@ -0,0 +1,99 @@ +//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Caching for ThinLTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/Caching.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::lto; + +static void commitEntry(StringRef TempFilename, StringRef EntryPath) { + // Rename to final destination (hopefully race condition won't matter here) + auto EC = sys::fs::rename(TempFilename, EntryPath); + if (EC) { + // Renaming failed, probably not the same filesystem, copy and delete. + // FIXME: Avoid needing to do this by creating the temporary file in the + // cache directory. + { + auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename); + if (auto EC = ReloadedBufferOrErr.getError()) + report_fatal_error(Twine("Failed to open temp file '") + TempFilename + + "': " + EC.message() + "\n"); + + raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None); + if (EC) + report_fatal_error(Twine("Failed to open ") + EntryPath + + " to save cached entry\n"); + // I'm not sure what are the guarantee if two processes are doing this + // at the same time. + OS << (*ReloadedBufferOrErr)->getBuffer(); + } + sys::fs::remove(TempFilename); + } +} + +NativeObjectCache lto::localCache(std::string CacheDirectoryPath, + AddFileFn AddFile) { + return [=](unsigned Task, StringRef Key) -> AddStreamFn { + // First, see if we have a cache hit. + SmallString<64> EntryPath; + sys::path::append(EntryPath, CacheDirectoryPath, Key); + if (sys::fs::exists(EntryPath)) { + AddFile(Task, EntryPath); + return AddStreamFn(); + } + + // This native object stream is responsible for commiting the resulting + // file to the cache and calling AddFile to add it to the link. + struct CacheStream : NativeObjectStream { + AddFileFn AddFile; + std::string TempFilename; + std::string EntryPath; + unsigned Task; + + CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddFileFn AddFile, + std::string TempFilename, std::string EntryPath, + unsigned Task) + : NativeObjectStream(std::move(OS)), AddFile(AddFile), + TempFilename(TempFilename), EntryPath(EntryPath), Task(Task) {} + + ~CacheStream() { + // Make sure the file is closed before committing it. + OS.reset(); + commitEntry(TempFilename, EntryPath); + AddFile(Task, EntryPath); + } + }; + + return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> { + // Write to a temporary to avoid race condition + int TempFD; + SmallString<64> TempFilename; + std::error_code EC = + sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename); + if (EC) { + errs() << "Error: " << EC.message() << "\n"; + report_fatal_error("ThinLTO: Can't get a temporary file"); + } + + // This CacheStream will move the temporary file into the cache when done. + return llvm::make_unique<CacheStream>( + llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true), + AddFile, TempFilename.str(), EntryPath.str(), Task); + }; + }; +} diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp index 10226c4..e3e2f9f 100644 --- a/contrib/llvm/lib/LTO/LTO.cpp +++ b/contrib/llvm/lib/LTO/LTO.cpp @@ -12,32 +12,130 @@ //===----------------------------------------------------------------------===// #include "llvm/LTO/LTO.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/IR/AutoUpgrade.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/LTO/LTOBackend.h" +#include "llvm/Linker/IRMover.h" +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/SplitModule.h" -namespace llvm { +#include <set> -// Simple helper to load a module from bitcode -std::unique_ptr<Module> loadModuleFromBuffer(const MemoryBufferRef &Buffer, - LLVMContext &Context, bool Lazy) { - SMDiagnostic Err; - ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr); - if (Lazy) { - ModuleOrErr = - getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context, - /* ShouldLazyLoadMetadata */ Lazy); - } else { - ModuleOrErr = parseBitcodeFile(Buffer, Context); +using namespace llvm; +using namespace lto; +using namespace object; + +#define DEBUG_TYPE "lto" + +// Returns a unique hash for the Module considering the current list of +// export/import and other global analysis results. +// The hash is produced in \p Key. +static void computeCacheKey( + SmallString<40> &Key, const Config &Conf, const ModuleSummaryIndex &Index, + StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals) { + // Compute the unique hash for this entry. + // This is based on the current compiler version, the module itself, the + // export list, the hash for every single module in the import list, the + // list of ResolvedODR for the module, and the list of preserved symbols. + SHA1 Hasher; + + // Start with the compiler revision + Hasher.update(LLVM_VERSION_STRING); +#ifdef HAVE_LLVM_REVISION + Hasher.update(LLVM_REVISION); +#endif + + // Include the parts of the LTO configuration that affect code generation. + auto AddString = [&](StringRef Str) { + Hasher.update(Str); + Hasher.update(ArrayRef<uint8_t>{0}); + }; + auto AddUnsigned = [&](unsigned I) { + uint8_t Data[4]; + Data[0] = I; + Data[1] = I >> 8; + Data[2] = I >> 16; + Data[3] = I >> 24; + Hasher.update(ArrayRef<uint8_t>{Data, 4}); + }; + AddString(Conf.CPU); + // FIXME: Hash more of Options. For now all clients initialize Options from + // command-line flags (which is unsupported in production), but may set + // RelaxELFRelocations. The clang driver can also pass FunctionSections, + // DataSections and DebuggerTuning via command line flags. + AddUnsigned(Conf.Options.RelaxELFRelocations); + AddUnsigned(Conf.Options.FunctionSections); + AddUnsigned(Conf.Options.DataSections); + AddUnsigned((unsigned)Conf.Options.DebuggerTuning); + for (auto &A : Conf.MAttrs) + AddString(A); + AddUnsigned(Conf.RelocModel); + AddUnsigned(Conf.CodeModel); + AddUnsigned(Conf.CGOptLevel); + AddUnsigned(Conf.OptLevel); + AddString(Conf.OptPipeline); + AddString(Conf.AAPipeline); + AddString(Conf.OverrideTriple); + AddString(Conf.DefaultTriple); + + // Include the hash for the current module + auto ModHash = Index.getModuleHash(ModuleID); + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + for (auto F : ExportList) + // The export list can impact the internalization, be conservative here + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F))); + + // Include the hash for every module we import functions from + for (auto &Entry : ImportList) { + auto ModHash = Index.getModuleHash(Entry.first()); + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + } + + // Include the hash for the resolved ODR. + for (auto &Entry : ResolvedODR) { + Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first, + sizeof(GlobalValue::GUID))); + Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second, + sizeof(GlobalValue::LinkageTypes))); + } + + // Include the hash for the linkage type to reflect internalization and weak + // resolution. + for (auto &GS : DefinedGlobals) { + GlobalValue::LinkageTypes Linkage = GS.second->linkage(); + Hasher.update( + ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage))); } - if (std::error_code EC = ModuleOrErr.getError()) { - Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error, - EC.message()); - Err.print("ThinLTO", errs()); - report_fatal_error("Can't load module, abort."); + + if (!Conf.SampleProfile.empty()) { + auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile); + if (FileOrErr) + Hasher.update(FileOrErr.get()->getBuffer()); } - return std::move(ModuleOrErr.get()); + + Key = toHex(Hasher.result()); } static void thinLTOResolveWeakForLinkerGUID( @@ -48,20 +146,25 @@ static void thinLTOResolveWeakForLinkerGUID( function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)> recordNewLinkage) { for (auto &S : GVSummaryList) { - if (GlobalInvolvedWithAlias.count(S.get())) - continue; GlobalValue::LinkageTypes OriginalLinkage = S->linkage(); if (!GlobalValue::isWeakForLinker(OriginalLinkage)) continue; // We need to emit only one of these. The prevailing module will keep it, // but turned into a weak, while the others will drop it when possible. + // This is both a compile-time optimization and a correctness + // transformation. This is necessary for correctness when we have exported + // a reference - we need to convert the linkonce to weak to + // ensure a copy is kept to satisfy the exported reference. + // FIXME: We may want to split the compile time and correctness + // aspects into separate routines. if (isPrevailing(GUID, S.get())) { if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) S->setLinkage(GlobalValue::getWeakLinkage( GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); } - // Alias can't be turned into available_externally. + // Alias and aliasee can't be turned into available_externally. else if (!isa<AliasSummary>(S.get()) && + !GlobalInvolvedWithAlias.count(S.get()) && (GlobalValue::isLinkOnceODRLinkage(OriginalLinkage) || GlobalValue::isWeakODRLinkage(OriginalLinkage))) S->setLinkage(GlobalValue::AvailableExternallyLinkage); @@ -76,7 +179,7 @@ static void thinLTOResolveWeakForLinkerGUID( // current module. However there is a chance that another module is still // referencing them because of the import. We make sure we always emit at least // one copy. -void thinLTOResolveWeakForLinkerInIndex( +void llvm::thinLTOResolveWeakForLinkerInIndex( ModuleSummaryIndex &Index, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing, @@ -110,10 +213,727 @@ static void thinLTOInternalizeAndPromoteGUID( // Update the linkages in the given \p Index to mark exported values // as external and non-exported values as internal. -void thinLTOInternalizeAndPromoteInIndex( +void llvm::thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, function_ref<bool(StringRef, GlobalValue::GUID)> isExported) { for (auto &I : Index) thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported); } + +struct InputFile::InputModule { + BitcodeModule BM; + std::unique_ptr<Module> Mod; + + // The range of ModuleSymbolTable entries for this input module. + size_t SymBegin, SymEnd; +}; + +// Requires a destructor for std::vector<InputModule>. +InputFile::~InputFile() = default; + +Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) { + std::unique_ptr<InputFile> File(new InputFile); + + ErrorOr<MemoryBufferRef> BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected<std::vector<BitcodeModule>> BMsOrErr = + getBitcodeModuleList(*BCOrErr); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + if (BMsOrErr->empty()) + return make_error<StringError>("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + // Create an InputModule for each module in the InputFile, and add it to the + // ModuleSymbolTable. + for (auto BM : *BMsOrErr) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + size_t SymBegin = File->SymTab.symbols().size(); + File->SymTab.addModule(MOrErr->get()); + size_t SymEnd = File->SymTab.symbols().size(); + + for (const auto &C : (*MOrErr)->getComdatSymbolTable()) { + auto P = File->ComdatMap.insert( + std::make_pair(&C.second, File->Comdats.size())); + assert(P.second); + (void)P; + File->Comdats.push_back(C.first()); + } + + File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd}); + } + + return std::move(File); +} + +Expected<int> InputFile::Symbol::getComdatIndex() const { + if (!isGV()) + return -1; + const GlobalObject *GO = getGV()->getBaseObject(); + if (!GO) + return make_error<StringError>("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = GO->getComdat()) { + auto I = File->ComdatMap.find(C); + assert(I != File->ComdatMap.end()); + return I->second; + } + return -1; +} + +StringRef InputFile::getName() const { + return Mods[0].BM.getModuleIdentifier(); +} + +StringRef InputFile::getSourceFileName() const { + return Mods[0].Mod->getSourceFileName(); +} + +iterator_range<InputFile::symbol_iterator> +InputFile::module_symbols(InputModule &IM) { + return llvm::make_range( + symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this), + symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this)); +} + +LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, + Config &Conf) + : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel), + Ctx(Conf) {} + +LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) : Backend(Backend) { + if (!Backend) + this->Backend = + createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); +} + +LTO::LTO(Config Conf, ThinBackend Backend, + unsigned ParallelCodeGenParallelismLevel) + : Conf(std::move(Conf)), + RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), + ThinLTO(std::move(Backend)) {} + +// Requires a destructor for MapVector<BitcodeModule>. +LTO::~LTO() = default; + +// Add the given symbol to the GlobalResolutions map, and resolve its partition. +void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used, + const InputFile::Symbol &Sym, + SymbolResolution Res, unsigned Partition) { + GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr; + + auto &GlobalRes = GlobalResolutions[Sym.getName()]; + if (GV) { + GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = GV->getName(); + } + // Set the partition to external if we know it is used elsewhere, e.g. + // it is visible to a regular object, is referenced from llvm.compiler_used, + // or was already recorded as being referenced from a different partition. + if (Res.VisibleToRegularObj || (GV && Used.count(GV)) || + (GlobalRes.Partition != GlobalResolution::Unknown && + GlobalRes.Partition != Partition)) { + GlobalRes.Partition = GlobalResolution::External; + } else + // First recorded reference, save the current partition. + GlobalRes.Partition = Partition; + + // Flag as visible outside of ThinLTO if visible from a regular object or + // if this is a reference in the regular LTO partition. + GlobalRes.VisibleOutsideThinLTO |= + (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO)); +} + +static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, + ArrayRef<SymbolResolution> Res) { + StringRef Path = Input->getName(); + OS << Path << '\n'; + auto ResI = Res.begin(); + for (const InputFile::Symbol &Sym : Input->symbols()) { + assert(ResI != Res.end()); + SymbolResolution Res = *ResI++; + + OS << "-r=" << Path << ',' << Sym.getName() << ','; + if (Res.Prevailing) + OS << 'p'; + if (Res.FinalDefinitionInLinkageUnit) + OS << 'l'; + if (Res.VisibleToRegularObj) + OS << 'x'; + OS << '\n'; + } + assert(ResI == Res.end()); +} + +Error LTO::add(std::unique_ptr<InputFile> Input, + ArrayRef<SymbolResolution> Res) { + assert(!CalledGetMaxTasks); + + if (Conf.ResolutionFile) + writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); + + const SymbolResolution *ResI = Res.begin(); + for (InputFile::InputModule &IM : Input->Mods) + if (Error Err = addModule(*Input, IM, ResI, Res.end())) + return Err; + + assert(ResI == Res.end()); + return Error::success(); +} + +Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + // FIXME: move to backend + Module &M = *IM.Mod; + + if (M.getDataLayoutStr().empty()) + return make_error<StringError>("input module has no datalayout", + inconvertibleErrorCode()); + + if (!Conf.OverrideTriple.empty()) + M.setTargetTriple(Conf.OverrideTriple); + else if (M.getTargetTriple().empty()) + M.setTargetTriple(Conf.DefaultTriple); + + Expected<bool> HasThinLTOSummary = IM.BM.hasSummary(); + if (!HasThinLTOSummary) + return HasThinLTOSummary.takeError(); + + if (*HasThinLTOSummary) + return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE); + else + return addRegularLTO(IM.BM, ResI, ResE); +} + +// Add a regular LTO object to the link. +Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + if (!RegularLTO.CombinedModule) { + RegularLTO.CombinedModule = + llvm::make_unique<Module>("ld-temp.o", RegularLTO.Ctx); + RegularLTO.Mover = llvm::make_unique<IRMover>(*RegularLTO.CombinedModule); + } + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Module &M = **MOrErr; + if (Error Err = M.materializeMetadata()) + return Err; + UpgradeDebugInfo(M); + + ModuleSymbolTable SymTab; + SymTab.addModule(&M); + + SmallPtrSet<GlobalValue *, 8> Used; + collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); + + std::vector<GlobalValue *> Keep; + + for (GlobalVariable &GV : M.globals()) + if (GV.hasAppendingLinkage()) + Keep.push_back(&GV); + + for (const InputFile::Symbol &Sym : + make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab, + nullptr), + InputFile::symbol_iterator(SymTab.symbols().end(), SymTab, + nullptr))) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + addSymbolToGlobalRes(Used, Sym, Res, 0); + + if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined) + continue; + if (Res.Prevailing && Sym.isGV()) { + GlobalValue *GV = Sym.getGV(); + Keep.push_back(GV); + switch (GV->getLinkage()) { + default: + break; + case GlobalValue::LinkOnceAnyLinkage: + GV->setLinkage(GlobalValue::WeakAnyLinkage); + break; + case GlobalValue::LinkOnceODRLinkage: + GV->setLinkage(GlobalValue::WeakODRLinkage); + break; + } + } + // Common resolution: collect the maximum size/alignment over all commons. + // We also record if we see an instance of a common as prevailing, so that + // if none is prevailing we can ignore it later. + if (Sym.getFlags() & object::BasicSymbolRef::SF_Common) { + // FIXME: We should figure out what to do about commons defined by asm. + // For now they aren't reported correctly by ModuleSymbolTable. + auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()]; + CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); + CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment()); + CommonRes.Prevailing |= Res.Prevailing; + } + + // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. + } + + return RegularLTO.Mover->move(std::move(*MOrErr), Keep, + [](GlobalValue &, IRMover::ValueAdder) {}, + /* LinkModuleInlineAsm */ true, + /* IsPerformingImport */ false); +} + +// Add a ThinLTO object to the link. +// FIXME: This function should not need to take as many parameters once we have +// a bitcode symbol table. +Error LTO::addThinLTO(BitcodeModule BM, Module &M, + iterator_range<InputFile::symbol_iterator> Syms, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + SmallPtrSet<GlobalValue *, 8> Used; + collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); + + Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = BM.getSummary(); + if (!SummaryOrErr) + return SummaryOrErr.takeError(); + ThinLTO.CombinedIndex.mergeFrom(std::move(*SummaryOrErr), + ThinLTO.ModuleMap.size()); + + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1); + + if (Res.Prevailing && Sym.isGV()) + ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] = + BM.getModuleIdentifier(); + } + + if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) + return make_error<StringError>( + "Expected at most one ThinLTO module per bitcode file", + inconvertibleErrorCode()); + + return Error::success(); +} + +unsigned LTO::getMaxTasks() const { + CalledGetMaxTasks = true; + return RegularLTO.ParallelCodeGenParallelismLevel + ThinLTO.ModuleMap.size(); +} + +Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { + // Save the status of having a regularLTO combined module, as + // this is needed for generating the ThinLTO Task ID, and + // the CombinedModule will be moved at the end of runRegularLTO. + bool HasRegularLTO = RegularLTO.CombinedModule != nullptr; + // Invoke regular LTO if there was a regular LTO module to start with. + if (HasRegularLTO) + if (auto E = runRegularLTO(AddStream)) + return E; + return runThinLTO(AddStream, Cache, HasRegularLTO); +} + +Error LTO::runRegularLTO(AddStreamFn AddStream) { + // Make sure commons have the right size/alignment: we kept the largest from + // all the prevailing when adding the inputs, and we apply it here. + const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); + for (auto &I : RegularLTO.Commons) { + if (!I.second.Prevailing) + // Don't do anything if no instance of this common was prevailing. + continue; + GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(I.first); + if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) { + // Don't create a new global if the type is already correct, just make + // sure the alignment is correct. + OldGV->setAlignment(I.second.Align); + continue; + } + ArrayType *Ty = + ArrayType::get(Type::getInt8Ty(RegularLTO.Ctx), I.second.Size); + auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false, + GlobalValue::CommonLinkage, + ConstantAggregateZero::get(Ty), ""); + GV->setAlignment(I.second.Align); + if (OldGV) { + OldGV->replaceAllUsesWith(ConstantExpr::getBitCast(GV, OldGV->getType())); + GV->takeName(OldGV); + OldGV->eraseFromParent(); + } else { + GV->setName(I.first); + } + } + + if (Conf.PreOptModuleHook && + !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) + return Error::success(); + + if (!Conf.CodeGenOnly) { + for (const auto &R : GlobalResolutions) { + if (R.second.IRName.empty()) + continue; + if (R.second.Partition != 0 && + R.second.Partition != GlobalResolution::External) + continue; + + GlobalValue *GV = + RegularLTO.CombinedModule->getNamedValue(R.second.IRName); + // Ignore symbols defined in other partitions. + if (!GV || GV->hasLocalLinkage()) + continue; + GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global + : GlobalValue::UnnamedAddr::None); + if (R.second.Partition == 0) + GV->setLinkage(GlobalValue::InternalLinkage); + } + + if (Conf.PostInternalizeModuleHook && + !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) + return Error::success(); + } + return backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, + std::move(RegularLTO.CombinedModule)); +} + +/// This class defines the interface to the ThinLTO backend. +class lto::ThinBackendProc { +protected: + Config &Conf; + ModuleSummaryIndex &CombinedIndex; + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries; + +public: + ThinBackendProc(Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) + : Conf(Conf), CombinedIndex(CombinedIndex), + ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {} + + virtual ~ThinBackendProc() {} + virtual Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + MapVector<StringRef, BitcodeModule> &ModuleMap) = 0; + virtual Error wait() = 0; +}; + +namespace { +class InProcessThinBackend : public ThinBackendProc { + ThreadPool BackendThreadPool; + AddStreamFn AddStream; + NativeObjectCache Cache; + + Optional<Error> Err; + std::mutex ErrMu; + +public: + InProcessThinBackend( + Config &Conf, ModuleSummaryIndex &CombinedIndex, + unsigned ThinLTOParallelismLevel, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, NativeObjectCache Cache) + : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), + BackendThreadPool(ThinLTOParallelismLevel), + AddStream(std::move(AddStream)), Cache(std::move(Cache)) {} + + Error runThinLTOBackendThread( + AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task, + BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector<StringRef, BitcodeModule> &ModuleMap) { + auto RunThinBackend = [&](AddStreamFn AddStream) { + LTOLLVMContext BackendContext(Conf); + Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext); + if (!MOrErr) + return MOrErr.takeError(); + + return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, + ImportList, DefinedGlobals, ModuleMap); + }; + + auto ModuleID = BM.getModuleIdentifier(); + + if (!Cache || !CombinedIndex.modulePaths().count(ModuleID) || + all_of(CombinedIndex.getModuleHash(ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return RunThinBackend(AddStream); + + SmallString<40> Key; + // The module may be cached, this helps handling it. + computeCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, ExportList, + ResolvedODR, DefinedGlobals); + if (AddStreamFn CacheAddStream = Cache(Task, Key)) + return RunThinBackend(CacheAddStream); + + return Error::success(); + } + + Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + MapVector<StringRef, BitcodeModule> &ModuleMap) override { + StringRef ModulePath = BM.getModuleIdentifier(); + assert(ModuleToDefinedGVSummaries.count(ModulePath)); + const GVSummaryMapTy &DefinedGlobals = + ModuleToDefinedGVSummaries.find(ModulePath)->second; + BackendThreadPool.async( + [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> + &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector<StringRef, BitcodeModule> &ModuleMap) { + Error E = runThinLTOBackendThread( + AddStream, Cache, Task, BM, CombinedIndex, ImportList, + ExportList, ResolvedODR, DefinedGlobals, ModuleMap); + if (E) { + std::unique_lock<std::mutex> L(ErrMu); + if (Err) + Err = joinErrors(std::move(*Err), std::move(E)); + else + Err = std::move(E); + } + }, + BM, std::ref(CombinedIndex), std::ref(ImportList), + std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals), + std::ref(ModuleMap)); + return Error::success(); + } + + Error wait() override { + BackendThreadPool.wait(); + if (Err) + return std::move(*Err); + else + return Error::success(); + } +}; +} // end anonymous namespace + +ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) { + return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, NativeObjectCache Cache) { + return llvm::make_unique<InProcessThinBackend>( + Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries, + AddStream, Cache); + }; +} + +// Given the original \p Path to an output file, replace any path +// prefix matching \p OldPrefix with \p NewPrefix. Also, create the +// resulting directory if it does not yet exist. +std::string lto::getThinLTOOutputFile(const std::string &Path, + const std::string &OldPrefix, + const std::string &NewPrefix) { + if (OldPrefix.empty() && NewPrefix.empty()) + return Path; + SmallString<128> NewPath(Path); + llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix); + StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str()); + if (!ParentPath.empty()) { + // Make sure the new directory exists, creating it if necessary. + if (std::error_code EC = llvm::sys::fs::create_directories(ParentPath)) + llvm::errs() << "warning: could not create directory '" << ParentPath + << "': " << EC.message() << '\n'; + } + return NewPath.str(); +} + +namespace { +class WriteIndexesThinBackend : public ThinBackendProc { + std::string OldPrefix, NewPrefix; + bool ShouldEmitImportsFiles; + + std::string LinkedObjectsFileName; + std::unique_ptr<llvm::raw_fd_ostream> LinkedObjectsFile; + +public: + WriteIndexesThinBackend( + Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles, + std::string LinkedObjectsFileName) + : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), + OldPrefix(OldPrefix), NewPrefix(NewPrefix), + ShouldEmitImportsFiles(ShouldEmitImportsFiles), + LinkedObjectsFileName(LinkedObjectsFileName) {} + + Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + MapVector<StringRef, BitcodeModule> &ModuleMap) override { + StringRef ModulePath = BM.getModuleIdentifier(); + std::string NewModulePath = + getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + + std::error_code EC; + if (!LinkedObjectsFileName.empty()) { + if (!LinkedObjectsFile) { + LinkedObjectsFile = llvm::make_unique<raw_fd_ostream>( + LinkedObjectsFileName, EC, sys::fs::OpenFlags::F_None); + if (EC) + return errorCodeToError(EC); + } + *LinkedObjectsFile << NewModulePath << '\n'; + } + + std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex; + gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, + ImportList, ModuleToSummariesForIndex); + + raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, + sys::fs::OpenFlags::F_None); + if (EC) + return errorCodeToError(EC); + WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); + + if (ShouldEmitImportsFiles) + return errorCodeToError( + EmitImportsFiles(ModulePath, NewModulePath + ".imports", ImportList)); + return Error::success(); + } + + Error wait() override { return Error::success(); } +}; +} // end anonymous namespace + +ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix, + std::string NewPrefix, + bool ShouldEmitImportsFiles, + std::string LinkedObjectsFile) { + return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, NativeObjectCache Cache) { + return llvm::make_unique<WriteIndexesThinBackend>( + Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix, + ShouldEmitImportsFiles, LinkedObjectsFile); + }; +} + +Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, + bool HasRegularLTO) { + if (ThinLTO.ModuleMap.empty()) + return Error::success(); + + if (Conf.CombinedIndexHook && !Conf.CombinedIndexHook(ThinLTO.CombinedIndex)) + return Error::success(); + + // Collect for each module the list of function it defines (GUID -> + // Summary). + StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> + ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size()); + ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule( + ModuleToDefinedGVSummaries); + // Create entries for any modules that didn't have any GV summaries + // (either they didn't have any GVs to start with, or we suppressed + // generation of the summaries because they e.g. had inline assembly + // uses that couldn't be promoted/renamed on export). This is so + // InProcessThinBackend::start can still launch a backend thread, which + // is passed the map of summaries for the module, without any special + // handling for this case. + for (auto &Mod : ThinLTO.ModuleMap) + if (!ModuleToDefinedGVSummaries.count(Mod.first)) + ModuleToDefinedGVSummaries.try_emplace(Mod.first); + + // Compute "dead" symbols, we don't want to import/export these! + DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; + for (auto &Res : GlobalResolutions) { + if (Res.second.VisibleOutsideThinLTO && + // IRName will be defined if we have seen the prevailing copy of + // this value. If not, no need to preserve any ThinLTO copies. + !Res.second.IRName.empty()) + GUIDPreservedSymbols.insert(GlobalValue::getGUID(Res.second.IRName)); + } + + auto DeadSymbols = + computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); + + StringMap<FunctionImporter::ImportMapTy> ImportLists( + ThinLTO.ModuleMap.size()); + StringMap<FunctionImporter::ExportSetTy> ExportLists( + ThinLTO.ModuleMap.size()); + StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; + + if (Conf.OptLevel > 0) { + ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + ImportLists, ExportLists, &DeadSymbols); + + std::set<GlobalValue::GUID> ExportedGUIDs; + for (auto &Res : GlobalResolutions) { + // First check if the symbol was flagged as having external references. + if (Res.second.Partition != GlobalResolution::External) + continue; + // IRName will be defined if we have seen the prevailing copy of + // this value. If not, no need to mark as exported from a ThinLTO + // partition (and we can't get the GUID). + if (Res.second.IRName.empty()) + continue; + auto GUID = GlobalValue::getGUID(Res.second.IRName); + // Mark exported unless index-based analysis determined it to be dead. + if (!DeadSymbols.count(GUID)) + ExportedGUIDs.insert(GlobalValue::getGUID(Res.second.IRName)); + } + + auto isPrevailing = [&](GlobalValue::GUID GUID, + const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { + const auto &ExportList = ExportLists.find(ModuleIdentifier); + return (ExportList != ExportLists.end() && + ExportList->second.count(GUID)) || + ExportedGUIDs.count(GUID); + }; + thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); + + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + + thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, + recordNewLinkage); + } + + std::unique_ptr<ThinBackendProc> BackendProc = + ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + AddStream, Cache); + + // Task numbers start at ParallelCodeGenParallelismLevel if an LTO + // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1 + // are reserved for parallel code generation partitions. + unsigned Task = + HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0; + for (auto &Mod : ThinLTO.ModuleMap) { + if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], + ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap)) + return E; + ++Task; + } + + return BackendProc->wait(); } diff --git a/contrib/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm/lib/LTO/LTOBackend.cpp new file mode 100644 index 0000000..809db80 --- /dev/null +++ b/contrib/llvm/lib/LTO/LTOBackend.cpp @@ -0,0 +1,375 @@ +//===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the "backend" phase of LTO, i.e. it performs +// optimization and code generation on a loaded module. It is generally used +// internally by the LTO class but can also be used independently, for example +// to implement a standalone ThinLTO backend. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/LTOBackend.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/LTO/LTO.h" +#include "llvm/LTO/legacy/UpdateCompilerUsed.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; +using namespace lto; + +LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { + errs() << "failed to open " << Path << ": " << Msg << '\n'; + errs().flush(); + exit(1); +} + +Error Config::addSaveTemps(std::string OutputFileName, + bool UseInputModulePath) { + ShouldDiscardValueNames = false; + + std::error_code EC; + ResolutionFile = llvm::make_unique<raw_fd_ostream>( + OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text); + if (EC) + return errorCodeToError(EC); + + auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { + // Keep track of the hook provided by the linker, which also needs to run. + ModuleHookFn LinkerHook = Hook; + Hook = [=](unsigned Task, const Module &M) { + // If the linker's hook returned false, we need to pass that result + // through. + if (LinkerHook && !LinkerHook(Task, M)) + return false; + + std::string PathPrefix; + // If this is the combined module (not a ThinLTO backend compile) or the + // user hasn't requested using the input module's path, emit to a file + // named from the provided OutputFileName with the Task ID appended. + if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { + PathPrefix = OutputFileName + utostr(Task); + } else + PathPrefix = M.getModuleIdentifier(); + std::string Path = PathPrefix + "." + PathSuffix + ".bc"; + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); + // Because -save-temps is a debugging feature, we report the error + // directly and exit. + if (EC) + reportOpenError(Path, EC.message()); + WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false); + return true; + }; + }; + + setHook("0.preopt", PreOptModuleHook); + setHook("1.promote", PostPromoteModuleHook); + setHook("2.internalize", PostInternalizeModuleHook); + setHook("3.import", PostImportModuleHook); + setHook("4.opt", PostOptModuleHook); + setHook("5.precodegen", PreCodeGenModuleHook); + + CombinedIndexHook = [=](const ModuleSummaryIndex &Index) { + std::string Path = OutputFileName + "index.bc"; + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); + // Because -save-temps is a debugging feature, we report the error + // directly and exit. + if (EC) + reportOpenError(Path, EC.message()); + WriteIndexToFile(Index, OS); + return true; + }; + + return Error::success(); +} + +namespace { + +std::unique_ptr<TargetMachine> +createTargetMachine(Config &Conf, StringRef TheTriple, + const Target *TheTarget) { + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures(Triple(TheTriple)); + for (const std::string &A : Conf.MAttrs) + Features.AddFeature(A); + + return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( + TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel, + Conf.CodeModel, Conf.CGOptLevel)); +} + +static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, + std::string PipelineDesc, + std::string AAPipelineDesc, + bool DisableVerify) { + PassBuilder PB(TM); + AAManager AA; + + // Parse a custom AA pipeline if asked to. + if (!AAPipelineDesc.empty()) + if (!PB.parseAAPipeline(AA, AAPipelineDesc)) + report_fatal_error("unable to parse AA pipeline description: " + + AAPipelineDesc); + + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { return std::move(AA); }); + + // Register all the basic analyses with the managers. + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + ModulePassManager MPM; + + // Always verify the input. + MPM.addPass(VerifierPass()); + + // Now, add all the passes we've been requested to. + if (!PB.parsePassPipeline(MPM, PipelineDesc)) + report_fatal_error("unable to parse pass pipeline description: " + + PipelineDesc); + + if (!DisableVerify) + MPM.addPass(VerifierPass()); + MPM.run(Mod, MAM); +} + +static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, + bool IsThinLTO) { + legacy::PassManager passes; + passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + + PassManagerBuilder PMB; + PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); + PMB.Inliner = createFunctionInliningPass(); + // Unconditionally verify input since it is not verified before this + // point and has unknown origin. + PMB.VerifyInput = true; + PMB.VerifyOutput = !Conf.DisableVerify; + PMB.LoopVectorize = true; + PMB.SLPVectorize = true; + PMB.OptLevel = Conf.OptLevel; + PMB.PGOSampleUse = Conf.SampleProfile; + if (IsThinLTO) + PMB.populateThinLTOPassManager(passes); + else + PMB.populateLTOPassManager(passes); + passes.run(Mod); +} + +bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, + bool IsThinLTO) { + if (Conf.OptPipeline.empty()) + runOldPMPasses(Conf, Mod, TM, IsThinLTO); + else + runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, + Conf.DisableVerify); + return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); +} + +void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream, + unsigned Task, Module &Mod) { + if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) + return; + + auto Stream = AddStream(Task); + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, + TargetMachine::CGFT_ObjectFile)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(Mod); +} + +void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream, + unsigned ParallelCodeGenParallelismLevel, + std::unique_ptr<Module> Mod) { + ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel); + unsigned ThreadCount = 0; + const Target *T = &TM->getTarget(); + + SplitModule( + std::move(Mod), ParallelCodeGenParallelismLevel, + [&](std::unique_ptr<Module> MPart) { + // We want to clone the module in a new context to multi-thread the + // codegen. We do it by serializing partition modules to bitcode + // (while still on the main thread, in order to avoid data races) and + // spinning up new threads which deserialize the partitions into + // separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallString<0> BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(MPart.get(), BCOS); + + // Enqueue the task + CodegenThreadPool.async( + [&](const SmallString<0> &BC, unsigned ThreadId) { + LTOLLVMContext Ctx(C); + Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( + MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); + + std::unique_ptr<TargetMachine> TM = + createTargetMachine(C, MPartInCtx->getTargetTriple(), T); + + codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); + }, + // Pass BC using std::move to ensure that it get moved rather than + // copied into the thread's context. + std::move(BC), ThreadCount++); + }, + false); + + // Because the inner lambda (which runs in a worker thread) captures our local + // variables, we need to wait for the worker threads to terminate before we + // can leave the function scope. + CodegenThreadPool.wait(); +} + +Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) { + if (!C.OverrideTriple.empty()) + Mod.setTargetTriple(C.OverrideTriple); + else if (Mod.getTargetTriple().empty()) + Mod.setTargetTriple(C.DefaultTriple); + + std::string Msg; + const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg); + if (!T) + return make_error<StringError>(Msg, inconvertibleErrorCode()); + return T; +} + +} + +static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) { + // Collect the list of undefined symbols used in asm and update + // llvm.compiler.used to prevent optimization to drop these from the output. + StringSet<> AsmUndefinedRefs; + ModuleSymbolTable::CollectAsmSymbols( + Triple(Mod.getTargetTriple()), Mod.getModuleInlineAsm(), + [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { + if (Flags & object::BasicSymbolRef::SF_Undefined) + AsmUndefinedRefs.insert(Name); + }); + updateCompilerUsed(Mod, TM, AsmUndefinedRefs); +} + +Error lto::backend(Config &C, AddStreamFn AddStream, + unsigned ParallelCodeGenParallelismLevel, + std::unique_ptr<Module> Mod) { + Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod); + if (!TOrErr) + return TOrErr.takeError(); + + std::unique_ptr<TargetMachine> TM = + createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); + + handleAsmUndefinedRefs(*Mod, *TM); + + if (!C.CodeGenOnly) + if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false)) + return Error::success(); + + if (ParallelCodeGenParallelismLevel == 1) { + codegen(C, TM.get(), AddStream, 0, *Mod); + } else { + splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, + std::move(Mod)); + } + return Error::success(); +} + +Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, + Module &Mod, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const GVSummaryMapTy &DefinedGlobals, + MapVector<StringRef, BitcodeModule> &ModuleMap) { + Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod); + if (!TOrErr) + return TOrErr.takeError(); + + std::unique_ptr<TargetMachine> TM = + createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); + + handleAsmUndefinedRefs(Mod, *TM); + + if (Conf.CodeGenOnly) { + codegen(Conf, TM.get(), AddStream, Task, Mod); + return Error::success(); + } + + if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) + return Error::success(); + + renameModuleForThinLTO(Mod, CombinedIndex); + + thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals); + + if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) + return Error::success(); + + if (!DefinedGlobals.empty()) + thinLTOInternalizeModule(Mod, DefinedGlobals); + + if (Conf.PostInternalizeModuleHook && + !Conf.PostInternalizeModuleHook(Task, Mod)) + return Error::success(); + + auto ModuleLoader = [&](StringRef Identifier) { + assert(Mod.getContext().isODRUniquingDebugTypes() && + "ODR Type uniquing should be enabled on the context"); + auto I = ModuleMap.find(Identifier); + assert(I != ModuleMap.end()); + return I->second.getLazyModule(Mod.getContext(), + /*ShouldLazyLoadMetadata=*/true, + /*IsImporting*/ true); + }; + + FunctionImporter Importer(CombinedIndex, ModuleLoader); + if (Error Err = Importer.importFunctions(Mod, ImportList).takeError()) + return Err; + + if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) + return Error::success(); + + if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true)) + return Error::success(); + + codegen(Conf, TM.get(), AddStream, Task, Mod); + return Error::success(); +} diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp index 1da2d18..6af31e6 100644 --- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -19,7 +19,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/ParallelCG.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/Config/config.h" @@ -49,6 +49,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -58,6 +59,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include <system_error> using namespace llvm; @@ -89,6 +91,16 @@ cl::opt<bool> LTOStripInvalidDebugInfo( cl::init(false), #endif cl::Hidden); + +cl::opt<std::string> + LTORemarksFilename("lto-pass-remarks-output", + cl::desc("Output filename for pass remarks"), + cl::value_desc("filename")); + +cl::opt<bool> LTOPassRemarksWithHotness( + "lto-pass-remarks-with-hotness", + cl::desc("With PGO, include profile count in optimization remarks"), + cl::Hidden); } LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) @@ -130,15 +142,18 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeCFGSimplifyPassPass(R); } +void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { + const std::vector<StringRef> &undefs = Mod->getAsmUndefinedRefs(); + for (int i = 0, e = undefs.size(); i != e; ++i) + AsmUndefinedRefs[undefs[i]] = 1; +} + bool LTOCodeGenerator::addModule(LTOModule *Mod) { assert(&Mod->getModule().getContext() == &Context && "Expected module in same context"); bool ret = TheLinker->linkInModule(Mod->takeModule()); - - const std::vector<const char *> &undefs = Mod->getAsmUndefinedRefs(); - for (int i = 0, e = undefs.size(); i != e; ++i) - AsmUndefinedRefs[undefs[i]] = 1; + setAsmUndefinedRefs(Mod); // We've just changed the input, so let's make sure we verify it. HasVerifiedInput = false; @@ -154,10 +169,7 @@ void LTOCodeGenerator::setModule(std::unique_ptr<LTOModule> Mod) { MergedModule = Mod->takeModule(); TheLinker = make_unique<Linker>(*MergedModule); - - const std::vector<const char*> &Undefs = Mod->getAsmUndefinedRefs(); - for (int I = 0, E = Undefs.size(); I != E; ++I) - AsmUndefinedRefs[Undefs[I]] = 1; + setAsmUndefinedRefs(&*Mod); // We've just changed the input, so let's make sure we verify it. HasVerifiedInput = false; @@ -185,20 +197,21 @@ void LTOCodeGenerator::setOptLevel(unsigned Level) { switch (OptLevel) { case 0: CGOptLevel = CodeGenOpt::None; - break; + return; case 1: CGOptLevel = CodeGenOpt::Less; - break; + return; case 2: CGOptLevel = CodeGenOpt::Default; - break; + return; case 3: CGOptLevel = CodeGenOpt::Aggressive; - break; + return; } + llvm_unreachable("Unknown optimization level!"); } -bool LTOCodeGenerator::writeMergedModules(const char *Path) { +bool LTOCodeGenerator::writeMergedModules(StringRef Path) { if (!determineTarget()) return false; @@ -239,7 +252,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { SmallString<128> Filename; int FD; - const char *Extension = + StringRef Extension (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o"); std::error_code EC = @@ -250,11 +263,12 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { } // generate object file - tool_output_file objFile(Filename.c_str(), FD); + tool_output_file objFile(Filename, FD); bool genResult = compileOptimized(&objFile.os()); objFile.os().close(); if (objFile.os().has_error()) { + emitError((Twine("could not write object file: ") + Filename).str()); objFile.os().clear_error(); sys::fs::remove(Twine(Filename)); return false; @@ -363,32 +377,19 @@ std::unique_ptr<TargetMachine> LTOCodeGenerator::createTargetMachine() { void LTOCodeGenerator::preserveDiscardableGVs( Module &TheModule, llvm::function_ref<bool(const GlobalValue &)> mustPreserveGV) { - SetVector<Constant *> UsedValuesSet; - if (GlobalVariable *LLVMUsed = - TheModule.getGlobalVariable("llvm.compiler.used")) { - ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); - for (auto &V : Inits->operands()) - UsedValuesSet.insert(cast<Constant>(&V)); - LLVMUsed->eraseFromParent(); - } - llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext()); + std::vector<GlobalValue *> Used; auto mayPreserveGlobal = [&](GlobalValue &GV) { - if (!GV.isDiscardableIfUnused() || GV.isDeclaration()) + if (!GV.isDiscardableIfUnused() || GV.isDeclaration() || + !mustPreserveGV(GV)) return; - if (!mustPreserveGV(GV)) - return; - if (GV.hasAvailableExternallyLinkage()) { - emitWarning( + if (GV.hasAvailableExternallyLinkage()) + return emitWarning( (Twine("Linker asked to preserve available_externally global: '") + GV.getName() + "'").str()); - return; - } - if (GV.hasInternalLinkage()) { - emitWarning((Twine("Linker asked to preserve internal global: '") + + if (GV.hasInternalLinkage()) + return emitWarning((Twine("Linker asked to preserve internal global: '") + GV.getName() + "'").str()); - return; - } - UsedValuesSet.insert(ConstantExpr::getBitCast(&GV, i8PTy)); + Used.push_back(&GV); }; for (auto &GV : TheModule) mayPreserveGlobal(GV); @@ -397,15 +398,10 @@ void LTOCodeGenerator::preserveDiscardableGVs( for (auto &GV : TheModule.aliases()) mayPreserveGlobal(GV); - if (UsedValuesSet.empty()) + if (Used.empty()) return; - llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesSet.size()); - auto *LLVMUsed = new llvm::GlobalVariable( - TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedValuesSet.getArrayRef()), - "llvm.compiler.used"); - LLVMUsed->setSection("llvm.metadata"); + appendToCompilerUsed(TheModule, Used); } void LTOCodeGenerator::applyScopeRestrictions() { @@ -414,6 +410,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { // Declare a callback for the internalize pass that will ask for every // candidate GlobalValue if it can be internalized or not. + Mangler Mang; SmallString<64> MangledName; auto mustPreserveGV = [&](const GlobalValue &GV) -> bool { // Unnamed globals can't be mangled, but they can't be preserved either. @@ -425,8 +422,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { // underscore. MangledName.clear(); MangledName.reserve(GV.getName().size() + 1); - Mangler::getNameWithPrefix(MangledName, GV.getName(), - MergedModule->getDataLayout()); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); return MustPreserveSymbols.count(MangledName); }; @@ -510,6 +506,33 @@ void LTOCodeGenerator::verifyMergedModuleOnce() { report_fatal_error("Broken module found, compilation aborted!"); } +bool LTOCodeGenerator::setupOptimizationRemarks() { + if (LTORemarksFilename != "") { + std::error_code EC; + DiagnosticOutputFile = llvm::make_unique<tool_output_file>( + LTORemarksFilename, EC, sys::fs::F_None); + if (EC) { + emitError(EC.message()); + return false; + } + Context.setDiagnosticsOutputFile( + llvm::make_unique<yaml::Output>(DiagnosticOutputFile->os())); + } + + if (LTOPassRemarksWithHotness) + Context.setDiagnosticHotnessRequested(true); + + return true; +} + +void LTOCodeGenerator::finishOptimizationRemarks() { + if (DiagnosticOutputFile) { + DiagnosticOutputFile->keep(); + // FIXME: LTOCodeGenerator dtor is not invoked on Darwin + DiagnosticOutputFile->os().flush(); + } +} + /// Optimize merged modules using various IPO passes bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE, @@ -517,6 +540,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, if (!this->determineTarget()) return false; + if (!setupOptimizationRemarks()) + return false; + // We always run the verifier once on the merged module, the `DisableVerify` // parameter only applies to subsequent verify. verifyMergedModuleOnce(); @@ -585,12 +611,14 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) { if (llvm::AreStatisticsEnabled()) llvm::PrintStatistics(); + finishOptimizationRemarks(); + return true; } /// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging /// LTO problems. -void LTOCodeGenerator::setCodeGenDebugOptions(const char *Options) { +void LTOCodeGenerator::setCodeGenDebugOptions(StringRef Options) { for (std::pair<StringRef, StringRef> o = getToken(Options); !o.first.empty(); o = getToken(o.second)) CodegenOptions.push_back(o.first); diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp index a1d6f93..89aeb80 100644 --- a/contrib/llvm/lib/LTO/LTOModule.cpp +++ b/contrib/llvm/lib/LTO/LTOModule.cpp @@ -14,12 +14,11 @@ #include "llvm/LTO/legacy/LTOModule.h" #include "llvm/ADT/Triple.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" @@ -49,9 +48,11 @@ using namespace llvm; using namespace llvm::object; -LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj, +LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef, llvm::TargetMachine *TM) - : IRFile(std::move(Obj)), _target(TM) {} + : Mod(std::move(M)), MBRef(MBRef), _target(TM) { + SymTab.addModule(Mod.get()); +} LTOModule::~LTOModule() {} @@ -63,7 +64,7 @@ bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) { return bool(BCData); } -bool LTOModule::isBitcodeFile(const char *Path) { +bool LTOModule::isBitcodeFile(StringRef Path) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getFile(Path); if (!BufferOrErr) @@ -77,13 +78,12 @@ bool LTOModule::isBitcodeFile(const char *Path) { bool LTOModule::isThinLTO() { // Right now the detection is only based on the summary presence. We may want // to add a dedicated flag at some point. - return hasGlobalValueSummary(IRFile->getMemoryBufferRef(), - [](const DiagnosticInfo &DI) { - DiagnosticPrinterRawOStream DP(errs()); - DI.print(DP); - errs() << '\n'; - return; - }); + Expected<bool> Result = hasGlobalValueSummary(MBRef); + if (!Result) { + logAllUnhandledErrors(Result.takeError(), errs(), ""); + return false; + } + return *Result; } bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, @@ -93,8 +93,11 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, if (!BCOrErr) return false; LLVMContext Context; - std::string Triple = getBitcodeTargetTriple(*BCOrErr, Context); - return StringRef(Triple).startswith(TriplePrefix); + ErrorOr<std::string> TripleOrErr = + expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr)); + if (!TripleOrErr) + return false; + return StringRef(*TripleOrErr).startswith(TriplePrefix); } std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { @@ -103,11 +106,15 @@ std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { if (!BCOrErr) return ""; LLVMContext Context; - return getBitcodeProducerString(*BCOrErr, Context); + ErrorOr<std::string> ProducerOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeProducerString(*BCOrErr)); + if (!ProducerOrErr) + return ""; + return *ProducerOrErr; } ErrorOr<std::unique_ptr<LTOModule>> -LTOModule::createFromFile(LLVMContext &Context, const char *path, +LTOModule::createFromFile(LLVMContext &Context, StringRef path, const TargetOptions &options) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getFile(path); @@ -121,15 +128,15 @@ LTOModule::createFromFile(LLVMContext &Context, const char *path, } ErrorOr<std::unique_ptr<LTOModule>> -LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path, +LTOModule::createFromOpenFile(LLVMContext &Context, int fd, StringRef path, size_t size, const TargetOptions &options) { return createFromOpenFileSlice(Context, fd, path, size, 0, options); } ErrorOr<std::unique_ptr<LTOModule>> -LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, - const char *path, size_t map_size, - off_t offset, const TargetOptions &options) { +LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path, + size_t map_size, off_t offset, + const TargetOptions &options) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); if (std::error_code EC = BufferOrErr.getError()) { @@ -179,20 +186,14 @@ parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, if (!ShouldBeLazy) { // Parse the full file. - ErrorOr<std::unique_ptr<Module>> M = parseBitcodeFile(*MBOrErr, Context); - if (std::error_code EC = M.getError()) - return EC; - return std::move(*M); + return expectedToErrorOrAndEmitErrors(Context, + parseBitcodeFile(*MBOrErr, Context)); } // Parse lazily. - std::unique_ptr<MemoryBuffer> LightweightBuf = - MemoryBuffer::getMemBuffer(*MBOrErr, false); - ErrorOr<std::unique_ptr<Module>> M = getLazyBitcodeModule( - std::move(LightweightBuf), Context, true /*ShouldLazyLoadMetadata*/); - if (std::error_code EC = M.getError()) - return EC; - return std::move(*M); + return expectedToErrorOrAndEmitErrors( + Context, + getLazyBitcodeModule(*MBOrErr, Context, true /*ShouldLazyLoadMetadata*/)); } ErrorOr<std::unique_ptr<LTOModule>> @@ -232,12 +233,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None); - M->setDataLayout(target->createDataLayout()); - - std::unique_ptr<object::IRObjectFile> IRObj( - new object::IRObjectFile(Buffer, std::move(M))); - std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(IRObj), target)); + std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(M), Buffer, target)); Ret->parseSymbols(); Ret->parseMetadata(); @@ -281,7 +278,7 @@ void LTOModule::addObjCClass(const GlobalVariable *clgv) { _undefines.insert(std::make_pair(superclassName, NameAndAttributes())); if (IterBool.second) { NameAndAttributes &info = IterBool.first->second; - info.name = IterBool.first->first().data(); + info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = false; info.symbol = clgv; @@ -294,7 +291,7 @@ void LTOModule::addObjCClass(const GlobalVariable *clgv) { auto Iter = _defines.insert(className).first; NameAndAttributes info; - info.name = Iter->first().data(); + info.name = Iter->first(); info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT; info.isFunction = false; @@ -320,7 +317,7 @@ void LTOModule::addObjCCategory(const GlobalVariable *clgv) { return; NameAndAttributes &info = IterBool.first->second; - info.name = IterBool.first->first().data(); + info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = false; info.symbol = clgv; @@ -339,24 +336,25 @@ void LTOModule::addObjCClassRef(const GlobalVariable *clgv) { return; NameAndAttributes &info = IterBool.first->second; - info.name = IterBool.first->first().data(); + info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = false; info.symbol = clgv; } -void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) { +void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) { SmallString<64> Buffer; { raw_svector_ostream OS(Buffer); - Sym.printName(OS); + SymTab.printSymbolName(OS, Sym); + Buffer.c_str(); } - const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); - addDefinedDataSymbol(Buffer.c_str(), V); + const GlobalValue *V = Sym.get<GlobalValue *>(); + addDefinedDataSymbol(Buffer, V); } -void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) { +void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) { // Add to list of defined symbols. addDefinedSymbol(Name, v, false); @@ -406,24 +404,24 @@ void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) { } } -void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) { +void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) { SmallString<64> Buffer; { raw_svector_ostream OS(Buffer); - Sym.printName(OS); + SymTab.printSymbolName(OS, Sym); + Buffer.c_str(); } - const Function *F = - cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl())); - addDefinedFunctionSymbol(Buffer.c_str(), F); + const Function *F = cast<Function>(Sym.get<GlobalValue *>()); + addDefinedFunctionSymbol(Buffer, F); } -void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) { +void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) { // add to list of defined symbols addDefinedSymbol(Name, F, true); } -void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def, +void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def, bool isFunction) { // set alignment part log2() can have rounding errors uint32_t align = def->getAlignment(); @@ -472,8 +470,8 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def, // fill information structure NameAndAttributes info; StringRef NameRef = Iter->first(); - info.name = NameRef.data(); - assert(info.name[NameRef.size()] == '\0'); + info.name = NameRef; + assert(NameRef.data()[NameRef.size()] == '\0'); info.attributes = attr; info.isFunction = isFunction; info.symbol = def; @@ -484,7 +482,7 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def, /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the /// defined list. -void LTOModule::addAsmGlobalSymbol(const char *name, +void LTOModule::addAsmGlobalSymbol(StringRef name, lto_symbol_attributes scope) { auto IterBool = _defines.insert(name); @@ -492,7 +490,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name, if (!IterBool.second) return; - NameAndAttributes &info = _undefines[IterBool.first->first().data()]; + NameAndAttributes &info = _undefines[IterBool.first->first()]; if (info.symbol == nullptr) { // FIXME: This is trying to take care of module ASM like this: @@ -504,7 +502,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name, // much. // fill information structure - info.name = IterBool.first->first().data(); + info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope; info.isFunction = false; @@ -526,10 +524,10 @@ void LTOModule::addAsmGlobalSymbol(const char *name, /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the /// undefined list. -void LTOModule::addAsmGlobalSymbolUndef(const char *name) { +void LTOModule::addAsmGlobalSymbolUndef(StringRef name) { auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes())); - _asm_undefines.push_back(IterBool.first->first().data()); + _asm_undefines.push_back(IterBool.first->first()); // we already have the symbol if (!IterBool.second) @@ -538,19 +536,20 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) { uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED; attr |= LTO_SYMBOL_SCOPE_DEFAULT; NameAndAttributes &info = IterBool.first->second; - info.name = IterBool.first->first().data(); + info.name = IterBool.first->first(); info.attributes = attr; info.isFunction = false; info.symbol = nullptr; } /// Add a symbol which isn't defined just yet to a list to be resolved later. -void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, +void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym, bool isFunc) { SmallString<64> name; { raw_svector_ostream OS(name); - Sym.printName(OS); + SymTab.printSymbolName(OS, Sym); + name.c_str(); } auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes())); @@ -561,9 +560,9 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, NameAndAttributes &info = IterBool.first->second; - info.name = IterBool.first->first().data(); + info.name = IterBool.first->first(); - const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); + const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>(); if (decl->hasExternalWeakLinkage()) info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; @@ -575,9 +574,9 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, } void LTOModule::parseSymbols() { - for (auto &Sym : IRFile->symbols()) { - const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); - uint32_t Flags = Sym.getFlags(); + for (auto Sym : SymTab.symbols()) { + auto *GV = Sym.dyn_cast<GlobalValue *>(); + uint32_t Flags = SymTab.getSymbolFlags(Sym); if (Flags & object::BasicSymbolRef::SF_FormatSpecific) continue; @@ -587,9 +586,10 @@ void LTOModule::parseSymbols() { SmallString<64> Buffer; { raw_svector_ostream OS(Buffer); - Sym.printName(OS); + SymTab.printSymbolName(OS, Sym); + Buffer.c_str(); } - const char *Name = Buffer.c_str(); + StringRef Name(Buffer); if (IsUndefined) addAsmGlobalSymbolUndef(Name); @@ -648,12 +648,10 @@ void LTOModule::parseMetadata() { } // Globals - Mangler Mang; for (const NameAndAttributes &Sym : _symbols) { if (!Sym.symbol) continue; - _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol, - Mang); + _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol); } // Add other interesting metadata here. diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index bfb0980..40537e4 100644 --- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -21,10 +21,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriterPass.h" -#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" @@ -38,10 +40,13 @@ #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" @@ -59,17 +64,34 @@ using namespace llvm; namespace llvm { // Flags -discard-value-names, defined in LTOCodeGenerator.cpp extern cl::opt<bool> LTODiscardValueNames; +extern cl::opt<std::string> LTORemarksFilename; +extern cl::opt<bool> LTOPassRemarksWithHotness; } namespace { -static cl::opt<int> ThreadCount("threads", - cl::init(std::thread::hardware_concurrency())); +static cl::opt<int> + ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency())); -static void diagnosticHandler(const DiagnosticInfo &DI) { - DiagnosticPrinterRawOStream DP(errs()); - DI.print(DP); - errs() << '\n'; +Expected<std::unique_ptr<tool_output_file>> +setupOptimizationRemarks(LLVMContext &Ctx, int Count) { + if (LTOPassRemarksWithHotness) + Ctx.setDiagnosticHotnessRequested(true); + + if (LTORemarksFilename.empty()) + return nullptr; + + std::string FileName = + LTORemarksFilename + ".thin." + llvm::utostr(Count) + ".yaml"; + std::error_code EC; + auto DiagnosticOutputFile = + llvm::make_unique<tool_output_file>(FileName, EC, sys::fs::F_None); + if (EC) + return errorCodeToError(EC); + Ctx.setDiagnosticsOutputFile( + llvm::make_unique<yaml::Output>(DiagnosticOutputFile->os())); + DiagnosticOutputFile->keep(); + return std::move(DiagnosticOutputFile); } // Simple helper to save temporary files for debug. @@ -78,9 +100,9 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir, if (TempDir.empty()) return; // User asked to save temps, let dump the bitcode file after import. - auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix; + std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str(); std::error_code EC; - raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None); + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); @@ -128,13 +150,13 @@ static void computePrevailingCopies( } static StringMap<MemoryBufferRef> -generateModuleMap(const std::vector<MemoryBufferRef> &Modules) { +generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) { StringMap<MemoryBufferRef> ModuleMap; for (auto &ModuleBuffer : Modules) { assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == ModuleMap.end() && "Expect unique Buffer Identifier"); - ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; + ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer(); } return ModuleMap; } @@ -144,22 +166,55 @@ static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { report_fatal_error("renameModuleForThinLTO failed"); } +static std::unique_ptr<Module> +loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, + bool Lazy, bool IsImporting) { + SMDiagnostic Err; + Expected<std::unique_ptr<Module>> ModuleOrErr = + Lazy + ? getLazyBitcodeModule(Buffer, Context, + /* ShouldLazyLoadMetadata */ true, IsImporting) + : parseBitcodeFile(Buffer, Context); + if (!ModuleOrErr) { + handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { + SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(), + SourceMgr::DK_Error, EIB.message()); + Err.print("ThinLTO", errs()); + }); + report_fatal_error("Can't load module, abort."); + } + return std::move(ModuleOrErr.get()); +} + static void crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, StringMap<MemoryBufferRef> &ModuleMap, const FunctionImporter::ImportMapTy &ImportList) { - ModuleLoader Loader(TheModule.getContext(), ModuleMap); + auto Loader = [&](StringRef Identifier) { + return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(), + /*Lazy=*/true, /*IsImporting*/ true); + }; + FunctionImporter Importer(Index, Loader); - Importer.importFunctions(TheModule, ImportList); + Expected<bool> Result = Importer.importFunctions(TheModule, ImportList); + if (!Result) { + handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) { + SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(), + SourceMgr::DK_Error, EIB.message()); + Err.print("ThinLTO", errs()); + }); + report_fatal_error("importFunctions failed"); + } } -static void optimizeModule(Module &TheModule, TargetMachine &TM) { +static void optimizeModule(Module &TheModule, TargetMachine &TM, + unsigned OptLevel) { // Populate the PassManager PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); PMB.Inliner = createFunctionInliningPass(); // FIXME: should get it from the bitcode? - PMB.OptLevel = 3; + PMB.OptLevel = OptLevel; PMB.LoopVectorize = true; PMB.SLPVectorize = true; PMB.VerifyInput = true; @@ -229,25 +284,65 @@ public: const FunctionImporter::ExportSetTy &ExportList, const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, const GVSummaryMapTy &DefinedFunctions, - const DenseSet<GlobalValue::GUID> &PreservedSymbols) { + const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel, + const TargetMachineBuilder &TMBuilder) { if (CachePath.empty()) return; + if (!Index.modulePaths().count(ModuleID)) + // The module does not have an entry, it can't have a hash at all + return; + // Compute the unique hash for this entry // This is based on the current compiler version, the module itself, the // export list, the hash for every single module in the import list, the // list of ResolvedODR for the module, and the list of preserved symbols. + // Include the hash for the current module + auto ModHash = Index.getModuleHash(ModuleID); + + if (all_of(ModHash, [](uint32_t V) { return V == 0; })) + // No hash entry, no caching! + return; + SHA1 Hasher; + // Include the parts of the LTO configuration that affect code generation. + auto AddString = [&](StringRef Str) { + Hasher.update(Str); + Hasher.update(ArrayRef<uint8_t>{0}); + }; + auto AddUnsigned = [&](unsigned I) { + uint8_t Data[4]; + Data[0] = I; + Data[1] = I >> 8; + Data[2] = I >> 16; + Data[3] = I >> 24; + Hasher.update(ArrayRef<uint8_t>{Data, 4}); + }; + // Start with the compiler revision Hasher.update(LLVM_VERSION_STRING); #ifdef HAVE_LLVM_REVISION Hasher.update(LLVM_REVISION); #endif - // Include the hash for the current module - auto ModHash = Index.getModuleHash(ModuleID); + // Hash the optimization level and the target machine settings. + AddString(TMBuilder.MCpu); + // FIXME: Hash more of Options. For now all clients initialize Options from + // command-line flags (which is unsupported in production), but may set + // RelaxELFRelocations. The clang driver can also pass FunctionSections, + // DataSections and DebuggerTuning via command line flags. + AddUnsigned(TMBuilder.Options.RelaxELFRelocations); + AddUnsigned(TMBuilder.Options.FunctionSections); + AddUnsigned(TMBuilder.Options.DataSections); + AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning); + AddString(TMBuilder.MAttr); + if (TMBuilder.RelocModel) + AddUnsigned(*TMBuilder.RelocModel); + AddUnsigned(TMBuilder.CGOptLevel); + AddUnsigned(OptLevel); + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); for (auto F : ExportList) // The export list can impact the internalization, be conservative here @@ -288,10 +383,9 @@ public: } // Cache the Produced object file - std::unique_ptr<MemoryBuffer> - write(std::unique_ptr<MemoryBuffer> OutputBuffer) { + void write(const MemoryBuffer &OutputBuffer) { if (EntryPath.empty()) - return OutputBuffer; + return; // Write to a temporary to avoid race condition SmallString<128> TempFilename; @@ -304,7 +398,7 @@ public: } { raw_fd_ostream OS(TempFD, /* ShouldClose */ true); - OS << OutputBuffer->getBuffer(); + OS << OutputBuffer.getBuffer(); } // Rename to final destination (hopefully race condition won't matter here) EC = sys::fs::rename(TempFilename, EntryPath); @@ -314,16 +408,8 @@ public: if (EC) report_fatal_error(Twine("Failed to open ") + EntryPath + " to save cached entry\n"); - OS << OutputBuffer->getBuffer(); + OS << OutputBuffer.getBuffer(); } - auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath); - if (auto EC = ReloadedBufferOrErr.getError()) { - // FIXME diagnose - errs() << "error: can't reload cached file '" << EntryPath - << "': " << EC.message() << "\n"; - return OutputBuffer; - } - return std::move(*ReloadedBufferOrErr); } }; @@ -336,7 +422,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGlobals, const ThinLTOCodeGenerator::CachingOptions &CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, - unsigned count) { + unsigned OptLevel, unsigned count) { // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); @@ -368,7 +454,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } - optimizeModule(TheModule, TM); + optimizeModule(TheModule, TM, OptLevel); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); @@ -377,8 +463,9 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, SmallVector<char, 128> OutputBuffer; { raw_svector_ostream OS(OutputBuffer); - ModuleSummaryIndexBuilder IndexBuilder(&TheModule); - WriteBitcodeToFile(&TheModule, OS, true, &IndexBuilder.getIndex()); + ProfileSummaryInfo PSI(TheModule); + auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr); + WriteBitcodeToFile(&TheModule, OS, true, &Index); } return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer)); } @@ -435,18 +522,27 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, } // end anonymous namespace void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { - MemoryBufferRef Buffer(Data, Identifier); + ThinLTOBuffer Buffer(Data, Identifier); if (Modules.empty()) { // First module added, so initialize the triple and some options LLVMContext Context; - Triple TheTriple(getBitcodeTargetTriple(Buffer, Context)); + StringRef TripleStr; + ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); + if (TripleOrErr) + TripleStr = *TripleOrErr; + Triple TheTriple(TripleStr); initTMBuilder(TMBuilder, Triple(TheTriple)); } #ifndef NDEBUG else { LLVMContext Context; - assert(TMBuilder.TheTriple.str() == - getBitcodeTargetTriple(Buffer, Context) && + StringRef TripleStr; + ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); + if (TripleOrErr) + TripleStr = *TripleOrErr; + assert(TMBuilder.TheTriple.str() == TripleStr && "ThinLTO modules with different triple not supported"); } #endif @@ -477,6 +573,7 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { SubtargetFeatures Features(MAttr); Features.getDefaultSubtargetFeatures(TheTriple); std::string FeatureStr = Features.getString(); + return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( TheTriple.str(), MCpu, FeatureStr, Options, RelocModel, CodeModel::Default, CGOptLevel)); @@ -490,13 +587,14 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { std::unique_ptr<ModuleSummaryIndex> CombinedIndex; uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { - ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(ModuleBuffer, - diagnosticHandler); - if (std::error_code EC = ObjOrErr.getError()) { + Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = + object::ModuleSummaryIndexObjectFile::create( + ModuleBuffer.getMemBuffer()); + if (!ObjOrErr) { // FIXME diagnose - errs() << "error: can't create ModuleSummaryIndexObjectFile for buffer: " - << EC.message() << "\n"; + logAllUnhandledErrors( + ObjOrErr.takeError(), errs(), + "error: can't create ModuleSummaryIndexObjectFile for buffer: "); return nullptr; } auto Index = (*ObjOrErr)->takeIndex(); @@ -517,15 +615,23 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index) { auto ModuleCount = Index.modulePaths().size(); auto ModuleIdentifier = TheModule.getModuleIdentifier(); + // Collect for each module the list of function it defines (GUID -> Summary). StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries; Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + // Generate import/export list StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); + ExportLists, &DeadSymbols); // Resolve LinkOnce/Weak symbols. StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; @@ -534,6 +640,16 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, thinLTOResolveWeakForLinkerModule( TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); + // Promote the exported values in the index, so that they are promoted + // in the module. + auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { + const auto &ExportList = ExportLists.find(ModuleIdentifier); + return (ExportList != ExportLists.end() && + ExportList->second.count(GUID)) || + GUIDPreservedSymbols.count(GUID); + }; + thinLTOInternalizeAndPromoteInIndex(Index, isExported); + promoteModule(TheModule, Index); } @@ -549,11 +665,18 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + // Generate import/export list StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); + ExportLists, &DeadSymbols); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); @@ -578,7 +701,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule( ExportLists); llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, - ImportLists, + ImportLists[ModulePath], ModuleToSummariesForIndex); } @@ -601,7 +724,7 @@ void ThinLTOCodeGenerator::emitImports(StringRef ModulePath, ExportLists); std::error_code EC; - if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists))) + if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath]))) report_fatal_error(Twine("Failed to open ") + OutputName + " to save imports lists\n"); } @@ -623,11 +746,14 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + // Generate import/export list StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); + ExportLists, &DeadSymbols); auto &ExportList = ExportLists[ModuleIdentifier]; // Be friendly and don't nuke totally the module when the client didn't @@ -654,7 +780,7 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); // Optimize now - optimizeModule(TheModule, *TMBuilder.create()); + optimizeModule(TheModule, *TMBuilder.create(), OptLevel); } /** @@ -665,13 +791,61 @@ std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) { return codegenModule(TheModule, *TMBuilder.create()); } +/// Write out the generated object file, either from CacheEntryPath or from +/// OutputBuffer, preferring hard-link when possible. +/// Returns the path to the generated file in SavedObjectsDirectoryPath. +static std::string writeGeneratedObject(int count, StringRef CacheEntryPath, + StringRef SavedObjectsDirectoryPath, + const MemoryBuffer &OutputBuffer) { + SmallString<128> OutputPath(SavedObjectsDirectoryPath); + llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o"); + OutputPath.c_str(); // Ensure the string is null terminated. + if (sys::fs::exists(OutputPath)) + sys::fs::remove(OutputPath); + + // We don't return a memory buffer to the linker, just a list of files. + if (!CacheEntryPath.empty()) { + // Cache is enabled, hard-link the entry (or copy if hard-link fails). + auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath); + if (!Err) + return OutputPath.str(); + // Hard linking failed, try to copy. + Err = sys::fs::copy_file(CacheEntryPath, OutputPath); + if (!Err) + return OutputPath.str(); + // Copy failed (could be because the CacheEntry was removed from the cache + // in the meantime by another process), fall back and try to write down the + // buffer to the output. + errs() << "error: can't link or copy from cached entry '" << CacheEntryPath + << "' to '" << OutputPath << "'\n"; + } + // No cache entry, just write out the buffer. + std::error_code Err; + raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None); + if (Err) + report_fatal_error("Can't open output '" + OutputPath + "'\n"); + OS << OutputBuffer.getBuffer(); + return OutputPath.str(); +} + // Main entry point for the ThinLTO processing void ThinLTOCodeGenerator::run() { + // Prepare the resulting object vector + assert(ProducedBinaries.empty() && "The generator should not be reused"); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries.resize(Modules.size()); + else { + sys::fs::create_directories(SavedObjectsDirectoryPath); + bool IsDir; + sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); + if (!IsDir) + report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); + ProducedBinaryFiles.resize(Modules.size()); + } + if (CodeGenOnly) { // Perform only parallel codegen and return. ThreadPool Pool; - assert(ProducedBinaries.empty() && "The generator should not be reused"); - ProducedBinaries.resize(Modules.size()); int count = 0; for (auto &ModuleBuffer : Modules) { Pool.async([&](int count) { @@ -679,10 +853,17 @@ void ThinLTOCodeGenerator::run() { Context.setDiscardValueNames(LTODiscardValueNames); // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // CodeGen - ProducedBinaries[count] = codegen(*TheModule); + auto OutputBuffer = codegen(*TheModule); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(OutputBuffer); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, "", SavedObjectsDirectoryPath, *OutputBuffer); }, count++); } @@ -703,9 +884,6 @@ void ThinLTOCodeGenerator::run() { WriteIndexToFile(*Index, OS); } - // Prepare the resulting object vector - assert(ProducedBinaries.empty() && "The generator should not be reused"); - ProducedBinaries.resize(Modules.size()); // Prepare the module map. auto ModuleMap = generateModuleMap(Modules); @@ -715,17 +893,20 @@ void ThinLTOCodeGenerator::run() { StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Convert the preserved symbols set from string to GUID, this is needed for + // computing the caching hash and the internalization. + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols); + // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); - - // Convert the preserved symbols set from string to GUID, this is needed for - // computing the caching hash and the internalization. - auto GUIDPreservedSymbols = - computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + ExportLists, &DeadSymbols); // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. @@ -764,8 +945,8 @@ void ThinLTOCodeGenerator::run() { std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), [&](int LeftIndex, int RightIndex) { - auto LSize = Modules[LeftIndex].getBufferSize(); - auto RSize = Modules[RightIndex].getBufferSize(); + auto LSize = Modules[LeftIndex].getBuffer().size(); + auto RSize = Modules[RightIndex].getBuffer().size(); return LSize > RSize; }); @@ -784,17 +965,24 @@ void ThinLTOCodeGenerator::run() { ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, ImportLists[ModuleIdentifier], ExportList, ResolvedODR[ModuleIdentifier], - DefinedFunctions, GUIDPreservedSymbols); + DefinedFunctions, GUIDPreservedSymbols, + OptLevel, TMBuilder); + auto CacheEntryPath = CacheEntry.getEntryPath(); { auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" - << CacheEntry.getEntryPath() << "' for buffer " << count - << " " << ModuleIdentifier << "\n"); + << CacheEntryPath << "' for buffer " << count << " " + << ModuleIdentifier << "\n"); if (ErrOrBuffer) { // Cache Hit! - ProducedBinaries[count] = std::move(ErrOrBuffer.get()); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(ErrOrBuffer.get()); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, CacheEntryPath, SavedObjectsDirectoryPath, + *ErrOrBuffer.get()); return; } } @@ -802,9 +990,17 @@ void ThinLTOCodeGenerator::run() { LLVMContext Context; Context.setDiscardValueNames(LTODiscardValueNames); Context.enableDebugTypeODRUniquing(); + auto DiagFileOrErr = setupOptimizationRemarks(Context, count); + if (!DiagFileOrErr) { + errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; + report_fatal_error("ThinLTO: Can't get an output file for the " + "remarks"); + } // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // Save temps: original file. saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); @@ -815,17 +1011,41 @@ void ThinLTOCodeGenerator::run() { *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, ExportList, GUIDPreservedSymbols, ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, - DisableCodeGen, SaveTempsDir, count); - - OutputBuffer = CacheEntry.write(std::move(OutputBuffer)); - ProducedBinaries[count] = std::move(OutputBuffer); + DisableCodeGen, SaveTempsDir, OptLevel, count); + + // Commit to the cache (if enabled) + CacheEntry.write(*OutputBuffer); + + if (SavedObjectsDirectoryPath.empty()) { + // We need to generated a memory buffer for the linker. + if (!CacheEntryPath.empty()) { + // Cache is enabled, reload from the cache + // We do this to lower memory pressuree: the buffer is on the heap + // and releasing it frees memory that can be used for the next input + // file. The final binary link will read from the VFS cache + // (hopefully!) or from disk if the memory pressure wasn't too high. + auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); + if (auto EC = ReloadedBufferOrErr.getError()) { + // On error, keeping the preexisting buffer and printing a + // diagnostic is more friendly than just crashing. + errs() << "error: can't reload cached file '" << CacheEntryPath + << "': " << EC.message() << "\n"; + } else { + OutputBuffer = std::move(*ReloadedBufferOrErr); + } + } + ProducedBinaries[count] = std::move(OutputBuffer); + return; + } + ProducedBinaryFiles[count] = writeGeneratedObject( + count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer); }, IndexCount); } } CachePruning(CacheOptions.Path) - .setPruningInterval(CacheOptions.PruningInterval) - .setEntryExpiration(CacheOptions.Expiration) + .setPruningInterval(std::chrono::seconds(CacheOptions.PruningInterval)) + .setEntryExpiration(std::chrono::seconds(CacheOptions.Expiration)) .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace) .prune(); diff --git a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp index a574db6..b67d9ea 100644 --- a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp +++ b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp @@ -18,6 +18,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -28,16 +29,16 @@ class PreserveLibCallsAndAsmUsed { public: PreserveLibCallsAndAsmUsed(const StringSet<> &AsmUndefinedRefs, const TargetMachine &TM, - SmallPtrSetImpl<const GlobalValue *> &LLVMUsed) + std::vector<GlobalValue *> &LLVMUsed) : AsmUndefinedRefs(AsmUndefinedRefs), TM(TM), LLVMUsed(LLVMUsed) {} - void findInModule(const Module &TheModule) { + void findInModule(Module &TheModule) { initializeLibCalls(TheModule); - for (const Function &F : TheModule) + for (Function &F : TheModule) findLibCallsAndAsm(F); - for (const GlobalVariable &GV : TheModule.globals()) + for (GlobalVariable &GV : TheModule.globals()) findLibCallsAndAsm(GV); - for (const GlobalAlias &GA : TheModule.aliases()) + for (GlobalAlias &GA : TheModule.aliases()) findLibCallsAndAsm(GA); } @@ -51,7 +52,7 @@ private: StringSet<> Libcalls; // Output - SmallPtrSetImpl<const GlobalValue *> &LLVMUsed; + std::vector<GlobalValue *> &LLVMUsed; // Collect names of runtime library functions. User-defined functions with the // same names are added to llvm.compiler.used to prevent them from being @@ -86,7 +87,7 @@ private: } } - void findLibCallsAndAsm(const GlobalValue &GV) { + void findLibCallsAndAsm(GlobalValue &GV) { // There are no restrictions to apply to declarations. if (GV.isDeclaration()) return; @@ -100,13 +101,15 @@ private: // optimizations like -globalopt, causing problems when later optimizations // add new library calls (e.g., llvm.memset => memset and printf => puts). // Leave it to the linker to remove any dead code (e.g. with -dead_strip). - if (isa<Function>(GV) && Libcalls.count(GV.getName())) - LLVMUsed.insert(&GV); + if (isa<Function>(GV) && Libcalls.count(GV.getName())) { + LLVMUsed.push_back(&GV); + return; + } SmallString<64> Buffer; TM.getNameWithPrefix(Buffer, &GV, Mangler); if (AsmUndefinedRefs.count(Buffer)) - LLVMUsed.insert(&GV); + LLVMUsed.push_back(&GV); } }; @@ -114,33 +117,12 @@ private: void llvm::updateCompilerUsed(Module &TheModule, const TargetMachine &TM, const StringSet<> &AsmUndefinedRefs) { - SmallPtrSet<const GlobalValue *, 8> UsedValues; + std::vector<GlobalValue *> UsedValues; PreserveLibCallsAndAsmUsed(AsmUndefinedRefs, TM, UsedValues) .findInModule(TheModule); if (UsedValues.empty()) return; - llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext()); - std::vector<Constant *> UsedValuesList; - for (const auto *GV : UsedValues) { - Constant *c = - ConstantExpr::getBitCast(const_cast<GlobalValue *>(GV), i8PTy); - UsedValuesList.push_back(c); - } - - GlobalVariable *LLVMUsed = TheModule.getGlobalVariable("llvm.compiler.used"); - if (LLVMUsed) { - ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); - for (auto &V : Inits->operands()) - UsedValuesList.push_back(cast<Constant>(&V)); - LLVMUsed->eraseFromParent(); - } - - llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesList.size()); - LLVMUsed = new llvm::GlobalVariable( - TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedValuesList), "llvm.compiler.used"); - - LLVMUsed->setSection("llvm.metadata"); + appendToCompilerUsed(TheModule, UsedValues); } |