diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Instrumentation')
14 files changed, 1996 insertions, 754 deletions
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f5e9e7d..f8d2552 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -22,9 +22,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,6 +45,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" @@ -80,6 +83,7 @@ static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; +static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; // The shadow memory space is dynamically allocated. static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel; @@ -100,6 +104,10 @@ static const char *const kAsanRegisterImageGlobalsName = "__asan_register_image_globals"; static const char *const kAsanUnregisterImageGlobalsName = "__asan_unregister_image_globals"; +static const char *const kAsanRegisterElfGlobalsName = + "__asan_register_elf_globals"; +static const char *const kAsanUnregisterElfGlobalsName = + "__asan_unregister_elf_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init"; @@ -119,8 +127,11 @@ static const char *const kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; static const char *const kAsanUnpoisonStackMemoryName = "__asan_unpoison_stack_memory"; + +// ASan version script has __asan_* wildcard. Triple underscore prevents a +// linker (gold) warning about attempting to export a local symbol. static const char *const kAsanGlobalsRegisteredFlagName = - "__asan_globals_registered"; + "___asan_globals_registered"; static const char *const kAsanOptionDetectUseAfterReturn = "__asan_option_detect_stack_use_after_return"; @@ -184,6 +195,11 @@ static cl::opt<uint32_t> ClMaxInlinePoisoningSize( static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", cl::desc("Check stack-use-after-return"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClRedzoneByvalArgs("asan-redzone-byval-args", + cl::desc("Create redzones for byval " + "arguments (extra copy " + "required)"), cl::Hidden, + cl::init(true)); static cl::opt<bool> ClUseAfterScope("asan-use-after-scope", cl::desc("Check stack-use-after-scope"), cl::Hidden, cl::init(false)); @@ -264,11 +280,17 @@ static cl::opt<bool> cl::Hidden, cl::init(false)); static cl::opt<bool> - ClUseMachOGlobalsSection("asan-globals-live-support", - cl::desc("Use linker features to support dead " - "code stripping of globals " - "(Mach-O only)"), - cl::Hidden, cl::init(true)); + ClUseGlobalsGC("asan-globals-live-support", + cl::desc("Use linker features to support dead " + "code stripping of globals"), + cl::Hidden, cl::init(true)); + +// This is on by default even though there is a bug in gold: +// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 +static cl::opt<bool> + ClWithComdat("asan-with-comdat", + cl::desc("Place ASan constructors in comdat sections"), + cl::Hidden, cl::init(true)); // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -380,6 +402,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsAndroid = TargetTriple.isAndroid(); bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); + bool IsPS4CPU = TargetTriple.isPS4CPU(); bool IsLinux = TargetTriple.isOSLinux(); bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || TargetTriple.getArch() == llvm::Triple::ppc64le; @@ -392,6 +415,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, TargetTriple.getArch() == llvm::Triple::mips64el; bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64; bool IsWindows = TargetTriple.isOSWindows(); + bool IsFuchsia = TargetTriple.isOSFuchsia(); ShadowMapping Mapping; @@ -412,12 +436,18 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, else Mapping.Offset = kDefaultShadowOffset32; } else { // LongSize == 64 - if (IsPPC64) + // Fuchsia is always PIE, which means that the beginning of the address + // space is always available. + if (IsFuchsia) + Mapping.Offset = 0; + else if (IsPPC64) Mapping.Offset = kPPC64_ShadowOffset64; else if (IsSystemZ) Mapping.Offset = kSystemZ_ShadowOffset64; else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset64; + else if (IsPS4CPU) + Mapping.Offset = kPS4CPU_ShadowOffset64; else if (IsLinux && IsX86_64) { if (IsKasan) Mapping.Offset = kLinuxKasan_ShadowOffset64; @@ -456,9 +486,9 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, // offset is not necessary 1/8-th of the address space. On SystemZ, // we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. - Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ - && !(Mapping.Offset & (Mapping.Offset - 1)) - && Mapping.Offset != kDynamicShadowSentinel; + Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU && + !(Mapping.Offset & (Mapping.Offset - 1)) && + Mapping.Offset != kDynamicShadowSentinel; return Mapping; } @@ -567,8 +597,6 @@ struct AddressSanitizer : public FunctionPass { Type *IntptrTy; ShadowMapping Mapping; DominatorTree *DT; - Function *AsanCtorFunction = nullptr; - Function *AsanInitFunction = nullptr; Function *AsanHandleNoReturnFunc; Function *AsanPtrCmpFunction, *AsanPtrSubFunction; // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize). @@ -587,22 +615,36 @@ struct AddressSanitizer : public FunctionPass { }; class AddressSanitizerModule : public ModulePass { - public: +public: explicit AddressSanitizerModule(bool CompileKernel = false, - bool Recover = false) + bool Recover = false, + bool UseGlobalsGC = true) : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), - Recover(Recover || ClRecover) {} + Recover(Recover || ClRecover), + UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + // Not a typo: ClWithComdat is almost completely pointless without + // ClUseGlobalsGC (because then it only works on modules without + // globals, which are rare); it is a prerequisite for ClUseGlobalsGC; + // and both suffer from gold PR19002 for which UseGlobalsGC constructor + // argument is designed as workaround. Therefore, disable both + // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to + // do globals-gc. + UseCtorComdat(UseGlobalsGC && ClWithComdat) {} bool runOnModule(Module &M) override; - static char ID; // Pass identification, replacement for typeid + static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } private: void initializeCallbacks(Module &M); - bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); + bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers); + void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers, + const std::string &UniqueModuleId); void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers); @@ -613,7 +655,8 @@ private: GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName); - void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); + void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, + StringRef InternalSuffix); IRBuilder<> CreateAsanModuleDtor(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); @@ -628,6 +671,8 @@ private: GlobalsMetadata GlobalsMD; bool CompileKernel; bool Recover; + bool UseGlobalsGC; + bool UseCtorComdat; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -638,6 +683,11 @@ private: Function *AsanUnregisterGlobals; Function *AsanRegisterImageGlobals; Function *AsanUnregisterImageGlobals; + Function *AsanRegisterElfGlobals; + Function *AsanUnregisterElfGlobals; + + Function *AsanCtorFunction = nullptr; + Function *AsanDtorFunction = nullptr; }; // Stack poisoning does not play well with exception handling. @@ -705,6 +755,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { bool runOnFunction() { if (!ClStack) return false; + + if (ClRedzoneByvalArgs && Mapping.Offset != kDynamicShadowSentinel) + copyArgsPassedByValToAllocas(); + // Collect alloca, ret, lifetime instructions etc. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); @@ -721,6 +775,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { return true; } + // Arguments marked with the "byval" attribute are implicitly copied without + // using an alloca instruction. To produce redzones for those arguments, we + // copy them a second time into memory allocated with an alloca instruction. + void copyArgsPassedByValToAllocas(); + // Finds all Alloca instructions and puts // poisoned red zones around all of them. // Then unpoison everything back before the function returns. @@ -906,9 +965,10 @@ INITIALIZE_PASS( "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, - bool Recover) { + bool Recover, + bool UseGlobalsGC) { assert(!CompileKernel || Recover); - return new AddressSanitizerModule(CompileKernel, Recover); + return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -1187,7 +1247,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { // dyn_cast as we might get UndefValue if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { - if (Masked->isNullValue()) + if (Masked->isZero()) // Mask is constant false, so no instrumentation needed. continue; // If we have a true or undef value, fall through to doInstrumentAddress @@ -1421,8 +1481,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return; + + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) + return; - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); for (Use &OP : CA->operands()) { if (isa<ConstantAggregateZero>(OP)) continue; ConstantStruct *CS = cast<ConstantStruct>(OP); @@ -1530,9 +1595,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // binary in order to allow the linker to properly dead strip. This is only // supported on recent versions of ld64. bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const { - if (!ClUseMachOGlobalsSection) - return false; - if (!TargetTriple.isOSBinFormatMachO()) return false; @@ -1561,38 +1623,48 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { // Declare our poisoning and unpoisoning functions. AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr)); + kAsanUnpoisonGlobalsName, IRB.getVoidTy())); AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); // Declare functions that register/unregister globals. AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); AsanUnregisterGlobals = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); + IntptrTy, IntptrTy)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); // Declare the functions that find globals in a shared object and then invoke // the (un)register function on them. AsanRegisterImageGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); + kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy)); AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage); AsanUnregisterImageGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); + kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); + + AsanRegisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage); + + AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage); } // Put the metadata and the instrumented global in the same group. This ensures // that the metadata is discarded if the instrumented global is discarded. void AddressSanitizerModule::SetComdatForGlobalMetadata( - GlobalVariable *G, GlobalVariable *Metadata) { + GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) { Module &M = *G->getParent(); Comdat *C = G->getComdat(); if (!C) { @@ -1602,7 +1674,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata( assert(G->hasLocalLinkage()); G->setName(Twine(kAsanGenPrefix) + "_anon_global"); } - C = M.getOrInsertComdat(G->getName()); + + if (!InternalSuffix.empty() && G->hasLocalLinkage()) { + std::string Name = G->getName(); + Name += InternalSuffix; + C = M.getOrInsertComdat(Name); + } else { + C = M.getOrInsertComdat(G->getName()); + } + // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. if (TargetTriple.isOSBinFormatCOFF()) C->setSelectionKind(Comdat::NoDuplicates); @@ -1618,21 +1698,21 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata( GlobalVariable * AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName) { - GlobalVariable *Metadata = - new GlobalVariable(M, Initializer->getType(), false, - GlobalVariable::InternalLinkage, Initializer, - Twine("__asan_global_") + - GlobalValue::getRealLinkageName(OriginalName)); + auto Linkage = TargetTriple.isOSBinFormatMachO() + ? GlobalVariable::InternalLinkage + : GlobalVariable::PrivateLinkage; + GlobalVariable *Metadata = new GlobalVariable( + M, Initializer->getType(), false, Linkage, Initializer, + Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName)); Metadata->setSection(getGlobalMetadataSection()); return Metadata; } IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { - Function *AsanDtorFunction = + AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); } @@ -1657,10 +1737,69 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF( "global metadata will not be padded appropriately"); Metadata->setAlignment(SizeOfGlobalStruct); - SetComdatForGlobalMetadata(G, Metadata); + SetComdatForGlobalMetadata(G, Metadata, ""); } } +void AddressSanitizerModule::InstrumentGlobalsELF( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers, + const std::string &UniqueModuleId) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + + SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, MetadataInitializers[i], G->getName()); + MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); + Metadata->setMetadata(LLVMContext::MD_associated, MD); + MetadataGlobals[i] = Metadata; + + SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); + } + + // Update llvm.compiler.used, adding the new metadata globals. This is + // needed so that during LTO these variables stay alive. + if (!MetadataGlobals.empty()) + appendToCompilerUsed(M, MetadataGlobals); + + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // Common linkage ensures that there is only one global per shared library. + GlobalVariable *RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + + // Create start and stop symbols. + GlobalVariable *StartELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__start_" + getGlobalMetadataSection()); + StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + GlobalVariable *StopELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__stop_" + getGlobalMetadataSection()); + StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + + // Create a call to register the globals with the runtime. + IRB.CreateCall(AsanRegisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); +} + void AddressSanitizerModule::InstrumentGlobalsMachO( IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers) { @@ -1669,7 +1808,7 @@ void AddressSanitizerModule::InstrumentGlobalsMachO( // On recent Mach-O platforms, use a structure which binds the liveness of // the global variable to the metadata struct. Keep the list of "Liveness" GV // created to be added to llvm.compiler.used - StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); + StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy); SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size()); for (size_t i = 0; i < ExtendedGlobals.size(); i++) { @@ -1680,9 +1819,9 @@ void AddressSanitizerModule::InstrumentGlobalsMachO( // On recent Mach-O platforms, we emit the global metadata in a way that // allows the linker to properly strip dead globals. - auto LivenessBinder = ConstantStruct::get( - LivenessTy, Initializer->getAggregateElement(0u), - ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); + auto LivenessBinder = + ConstantStruct::get(LivenessTy, Initializer->getAggregateElement(0u), + ConstantExpr::getPointerCast(Metadata, IntptrTy)); GlobalVariable *Liveness = new GlobalVariable( M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, Twine("__asan_binder_") + G->getName()); @@ -1748,7 +1887,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { +// Sets *CtorComdat to true if the global registration code emitted into the +// asan constructor is comdat-compatible. +bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) { + *CtorComdat = false; GlobalsMD.init(M); SmallVector<GlobalVariable *, 16> GlobalsToChange; @@ -1758,7 +1900,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { } size_t n = GlobalsToChange.size(); - if (n == 0) return false; + if (n == 0) { + *CtorComdat = true; + return false; + } auto &DL = M.getDataLayout(); @@ -1774,7 +1919,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, IntptrTy, IntptrTy, nullptr); + IntptrTy, IntptrTy, IntptrTy); SmallVector<GlobalVariable *, 16> NewGlobals(n); SmallVector<Constant *, 16> Initializers(n); @@ -1810,10 +1955,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); - StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr); - Constant *NewInitializer = - ConstantStruct::get(NewTy, G->getInitializer(), - Constant::getNullValue(RightRedZoneTy), nullptr); + StructType *NewTy = StructType::get(Ty, RightRedZoneTy); + Constant *NewInitializer = ConstantStruct::get( + NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy)); // Create a new global variable with enough space for a redzone. GlobalValue::LinkageTypes Linkage = G->getLinkage(); @@ -1862,7 +2006,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { GlobalValue *InstrumentedGlobal = NewGlobal; bool CanUsePrivateAliases = - TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO(); + TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() || + TargetTriple.isOSBinFormatWasm(); if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) { // Create local alias for NewGlobal to avoid crash on ODR between // instrumented and non-instrumented libraries. @@ -1893,7 +2038,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, - ConstantExpr::getPointerCast(ODRIndicator, IntptrTy), nullptr); + ConstantExpr::getPointerCast(ODRIndicator, IntptrTy)); if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; @@ -1902,9 +2047,16 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Initializers[i] = Initializer; } - if (TargetTriple.isOSBinFormatCOFF()) { + std::string ELFUniqueModuleId = + (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M) + : ""; + + if (!ELFUniqueModuleId.empty()) { + InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId); + *CtorComdat = true; + } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); - } else if (ShouldUseMachOGlobalsSection()) { + } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); } else { InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); @@ -1926,14 +2078,39 @@ bool AddressSanitizerModule::runOnModule(Module &M) { Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel); initializeCallbacks(M); - bool Changed = false; + if (CompileKernel) + return false; + + // Create a module constructor. A destructor is created lazily because not all + // platforms, and not all modules need it. + std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( + M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, kAsanVersionCheckName); + bool CtorComdat = true; + bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. - if (ClGlobals && !CompileKernel) { - Function *CtorFunc = M.getFunction(kAsanModuleCtorName); - assert(CtorFunc); - IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - Changed |= InstrumentGlobals(IRB, M); + if (ClGlobals) { + IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); + Changed |= InstrumentGlobals(IRB, M, &CtorComdat); + } + + // Put the constructor and destructor in comdat if both + // (1) global instrumentation is not TU-specific + // (2) target is ELF. + if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { + AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, + AsanCtorFunction); + if (AsanDtorFunction) { + AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, + AsanDtorFunction); + } + } else { + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + if (AsanDtorFunction) + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); } return Changed; @@ -1949,49 +2126,60 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string ExpStr = Exp ? "exp_" : ""; const std::string SuffixStr = CompileKernel ? "N" : "_n"; const std::string EndingStr = Recover ? "_noabort" : ""; - Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; - AsanErrorCallbackSized[AccessIsWrite][Exp] = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr, - IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); - AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr, - IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); - for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; - AccessSizeIndex++) { - const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); - AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, - IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); - AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr, - IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); + + SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy}; + SmallVector<Type *, 2> Args1{1, IntptrTy}; + if (Exp) { + Type *ExpType = Type::getInt32Ty(*C); + Args2.push_back(ExpType); + Args1.push_back(ExpType); } - } + AsanErrorCallbackSized[AccessIsWrite][Exp] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + + EndingStr, + FunctionType::get(IRB.getVoidTy(), Args2, false))); + + AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr, + FunctionType::get(IRB.getVoidTy(), Args2, false))); + + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); + AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, + FunctionType::get(IRB.getVoidTy(), Args1, false))); + + AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr, + FunctionType::get(IRB.getVoidTy(), Args1, false))); + } + } } const std::string MemIntrinCallbackPrefix = CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix; AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction( MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy)); AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction( MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy)); AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction( MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy)); AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr)); + M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy())); AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy)); AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy)); // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), @@ -2001,7 +2189,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) { // virtual bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. - GlobalsMD.init(M); C = &(M.getContext()); @@ -2009,13 +2196,6 @@ bool AddressSanitizer::doInitialization(Module &M) { IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); - if (!CompileKernel) { - std::tie(AsanCtorFunction, AsanInitFunction) = - createSanitizerCtorAndInitFunctions( - M, kAsanModuleCtorName, kAsanInitName, - /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); - } Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel); return true; } @@ -2034,6 +2214,8 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // We cannot just ignore these methods, because they may call other // instrumented functions. if (F.getName().find(" load]") != std::string::npos) { + Function *AsanInitFunction = + declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {}); IRBuilder<> IRB(&F.front(), F.front().begin()); IRB.CreateCall(AsanInitFunction, {}); return true; @@ -2081,7 +2263,6 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) { } bool AddressSanitizer::runOnFunction(Function &F) { - if (&F == AsanCtorFunction) return false; if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; if (F.getName().startswith("__asan_")) return false; @@ -2175,8 +2356,9 @@ bool AddressSanitizer::runOnFunction(Function &F) { (ClInstrumentationWithCallsThreshold >= 0 && ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); const DataLayout &DL = F.getParent()->getDataLayout(); - ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), - /*RoundToAlign=*/true); + ObjectSizeOpts ObjSizeOpts; + ObjSizeOpts.RoundToAlign = true; + ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), ObjSizeOpts); // Instrument. int NumInstrumented = 0; @@ -2234,18 +2416,18 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { std::string Suffix = itostr(i); AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy, - IntptrTy, nullptr)); + IntptrTy)); AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix, - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + IRB.getVoidTy(), IntptrTy, IntptrTy)); } if (ASan.UseAfterScope) { AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); + IntptrTy, IntptrTy)); AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); + IntptrTy, IntptrTy)); } for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) { @@ -2254,14 +2436,14 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { Name << std::setw(2) << std::setfill('0') << std::hex << Val; AsanSetShadowFunc[Val] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy)); } AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy)); AsanAllocasUnpoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy)); } void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask, @@ -2363,6 +2545,28 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) { llvm_unreachable("impossible LocalStackSize"); } +void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { + BasicBlock &FirstBB = *F.begin(); + IRBuilder<> IRB(&FirstBB, FirstBB.getFirstInsertionPt()); + const DataLayout &DL = F.getParent()->getDataLayout(); + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) { + Type *Ty = Arg.getType()->getPointerElementType(); + unsigned Align = Arg.getParamAlignment(); + if (Align == 0) Align = DL.getABITypeAlignment(Ty); + + const std::string &Name = Arg.hasName() ? Arg.getName().str() : + "Arg" + llvm::to_string(Arg.getArgNo()); + AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AI->setAlignment(Align); + Arg.replaceAllUsesWith(AI); + + uint64_t AllocSize = DL.getTypeAllocSize(Ty); + IRB.CreateMemCpy(AI, &Arg, AllocSize, Align); + } + } +} + PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, Instruction *ThenTerm, @@ -2566,7 +2770,7 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index d4c8369..a193efe 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" @@ -25,6 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" using namespace llvm; #define DEBUG_TYPE "bounds-checking" diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h index 3802f9f..16e2e6b 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h +++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -24,10 +27,10 @@ #include <utility> #include <vector> -namespace llvm { - #define DEBUG_TYPE "cfgmst" +namespace llvm { + /// \brief An union-find based Minimum Spanning Tree for CFG /// /// Implements a Union-find algorithm to compute Minimum Spanning Tree @@ -220,5 +223,8 @@ public: } }; -#undef DEBUG_TYPE // "cfgmst" } // end namespace llvm + +#undef DEBUG_TYPE // "cfgmst" + +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index b34d5b8..ddc975c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -44,15 +44,14 @@ /// For more information, please refer to the design document: /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" @@ -63,6 +62,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/SpecialCaseList.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -254,7 +254,7 @@ class DataFlowSanitizer : public ModulePass { MDNode *ColdCallWeights; DFSanABIList ABIList; DenseMap<Value *, Function *> UnwrappedFnMap; - AttributeSet ReadOnlyNoneAttrs; + AttrBuilder ReadOnlyNoneAttrs; bool DFSanRuntimeShadowMask; Value *getShadowAddress(Value *Addr, Instruction *Pos); @@ -331,6 +331,10 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> { DFSanFunction &DFSF; DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} + const DataLayout &getDataLayout() const { + return DFSF.F->getParent()->getDataLayout(); + } + void visitOperandShadowInst(Instruction &I); void visitBinaryOperator(BinaryOperator &BO); @@ -384,7 +388,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { ArgTypes.push_back(ShadowPtrTy); Type *RetType = T->getReturnType(); if (!RetType->isVoidTy()) - RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr); + RetType = StructType::get(RetType, ShadowTy); return FunctionType::get(RetType, ArgTypes, T->isVarArg()); } @@ -472,16 +476,14 @@ bool DataFlowSanitizer::doInitialization(Module &M) { GetArgTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ArgTLSTy), - (Type *)nullptr))); + FunctionType::get(PointerType::getUnqual(ArgTLSTy), false))); } if (GetRetvalTLSPtr) { RetvalTLS = nullptr; GetRetvalTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ShadowTy), - (Type *)nullptr))); + FunctionType::get(PointerType::getUnqual(ShadowTy), false))); } ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); @@ -539,16 +541,13 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, F->getParent()); NewF->copyAttributesFrom(F); NewF->removeAttributes( - AttributeSet::ReturnIndex, - AttributeSet::get(F->getContext(), AttributeSet::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { - NewF->removeAttributes( - AttributeSet::FunctionIndex, - AttributeSet().addAttribute(*Ctx, AttributeSet::FunctionIndex, - "split-stack")); + NewF->removeAttributes(AttributeList::FunctionIndex, + AttrBuilder().addAttribute("split-stack")); CallInst::Create(DFSanVarargWrapperFn, IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", BB); @@ -580,8 +579,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, Function::arg_iterator AI = F->arg_begin(); ++AI; for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) Args.push_back(&*AI); - CallInst *CI = - CallInst::Create(&F->getArgumentList().front(), Args, "", BB); + CallInst *CI = CallInst::Create(&*F->arg_begin(), Args, "", BB); ReturnInst *RI; if (FT->getReturnType()->isVoidTy()) RI = ReturnInst::Create(*Ctx, BB); @@ -595,7 +593,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, DFSanVisitor(DFSF).visitCallInst(*CI); if (!FT->getReturnType()->isVoidTy()) new StoreInst(DFSF.getShadow(RI->getReturnValue()), - &F->getArgumentList().back(), RI); + &*std::prev(F->arg_end()), RI); } return C; @@ -622,33 +620,33 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast<Function>(DFSanUnionFn)) { - F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); - F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); + F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); + F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) { - F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); - F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); + F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); + F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy); if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) { - F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly); - F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); + F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly); + F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); } DFSanUnimplementedFn = Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); DFSanSetLabelFn = Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy); if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) { - F->addAttribute(1, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); } DFSanNonzeroLabelFn = Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); @@ -694,9 +692,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B); + ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) + .addAttribute(Attribute::ReadNone); // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. @@ -717,9 +714,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M); NewF->copyAttributesFrom(&F); NewF->removeAttributes( - AttributeSet::ReturnIndex, - AttributeSet::get(NewF->getContext(), AttributeSet::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); for (Function::arg_iterator FArg = F.arg_begin(), NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); @@ -758,7 +754,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { &F, std::string("dfsw$") + std::string(F.getName()), GlobalValue::LinkOnceODRLinkage, NewFT); if (getInstrumentedABI() == IA_TLS) - NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs); + NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs); Value *WrappedFnCst = ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); @@ -906,7 +902,7 @@ Value *DFSanFunction::getShadow(Value *V) { break; } case DataFlowSanitizer::IA_Args: { - unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2; + unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2; Function::arg_iterator i = F->arg_begin(); while (ArgIdx--) ++i; @@ -983,9 +979,9 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { IRBuilder<> IRB(Pos); if (AvoidNewBlocks) { CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2}); - Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); CCS.Block = Pos->getParent(); CCS.Shadow = Call; @@ -996,9 +992,9 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT)); IRBuilder<> ThenIRB(BI); CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2}); - Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); BasicBlock *Tail = BI->getSuccessor(0); PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); @@ -1099,7 +1095,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, CallInst *FallbackCall = FallbackIRB.CreateCall( DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); - FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); // Compare each of the shadows stored in the loaded 64 bits to each other, // by computing (WideShadow rotl ShadowWidth) == WideShadow. @@ -1156,7 +1152,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, IRBuilder<> IRB(Pos); CallInst *FallbackCall = IRB.CreateCall( DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); - FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); return FallbackCall; } @@ -1446,7 +1442,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { // Custom functions returning non-void will write to the return label. if (!FT->getReturnType()->isVoidTy()) { - CustomFn->removeAttributes(AttributeSet::FunctionIndex, + CustomFn->removeAttributes(AttributeList::FunctionIndex, DFSF.DFS.ReadOnlyNoneAttrs); } } @@ -1474,6 +1470,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } i = CS.arg_begin(); + const unsigned ShadowArgStart = Args.size(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(DFSF.getShadow(*i)); @@ -1481,7 +1478,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) { auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, CS.arg_size() - FT->getNumParams()); auto *LabelVAAlloca = new AllocaInst( - LabelVATy, "labelva", &DFSF.F->getEntryBlock().front()); + LabelVATy, getDataLayout().getAllocaAddrSpace(), + "labelva", &DFSF.F->getEntryBlock().front()); for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); @@ -1494,8 +1492,9 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (!FT->getReturnType()->isVoidTy()) { if (!DFSF.LabelReturnAlloca) { DFSF.LabelReturnAlloca = - new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn", - &DFSF.F->getEntryBlock().front()); + new AllocaInst(DFSF.DFS.ShadowTy, + getDataLayout().getAllocaAddrSpace(), + "labelreturn", &DFSF.F->getEntryBlock().front()); } Args.push_back(DFSF.LabelReturnAlloca); } @@ -1507,6 +1506,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) { CustomCI->setCallingConv(CI->getCallingConv()); CustomCI->setAttributes(CI->getAttributes()); + // Update the parameter attributes of the custom call instruction to + // zero extend the shadow parameters. This is required for targets + // which consider ShadowTy an illegal type. + for (unsigned n = 0; n < FT->getNumParams(); n++) { + const unsigned ArgNo = ShadowArgStart + n; + if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy) + CustomCI->addParamAttr(ArgNo, Attribute::ZExt); + } + if (!FT->getReturnType()->isVoidTy()) { LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca); DFSF.setShadow(CustomCI, LabelLoad); @@ -1574,7 +1582,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) { unsigned VarArgSize = CS.arg_size() - FT->getNumParams(); ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); AllocaInst *VarArgShadow = - new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front()); + new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(), + "", &DFSF.F->getEntryBlock().front()); Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); for (unsigned n = 0; i != e; ++i, ++n) { IRB.CreateStore( @@ -1593,7 +1602,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } NewCS.setCallingConv(CS.getCallingConv()); NewCS.setAttributes(CS.getAttributes().removeAttributes( - *DFSF.DFS.Ctx, AttributeSet::ReturnIndex, + *DFSF.DFS.Ctx, AttributeList::ReturnIndex, AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType()))); if (Next) { diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp index 05eba6c..6864d29 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -18,7 +18,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -32,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -267,35 +267,35 @@ void EfficiencySanitizer::initializeCallbacks(Module &M) { SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr); EsanAlignedLoad[Idx] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy())); SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr); EsanAlignedStore[Idx] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy())); SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr); EsanUnalignedLoad[Idx] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy())); SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr); EsanUnalignedStore[Idx] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy())); } EsanUnalignedLoadN = checkSanitizerInterfaceFunction( M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IntptrTy)); EsanUnalignedStoreN = checkSanitizerInterfaceFunction( M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IntptrTy)); MemmoveFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IntptrTy)); MemcpyFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IntptrTy)); MemsetFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt32Ty(), IntptrTy, nullptr)); + IRB.getInt32Ty(), IntptrTy)); } bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) { @@ -398,8 +398,8 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( // u64 *ArrayCounter; // }; auto *StructInfoTy = - StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, - Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr); + StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, + Int8PtrPtrTy, Int64PtrTy, Int64PtrTy); auto *StructInfoPtrTy = StructInfoTy->getPointerTo(); // This structure should be kept consistent with the CacheFragInfo struct // in the runtime library. @@ -408,8 +408,7 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( // u32 NumStructs; // StructInfo *Structs; // }; - auto *CacheFragInfoTy = - StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr); + auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy); std::vector<StructType *> Vec = M.getIdentifiedStructTypes(); unsigned NumStructs = 0; @@ -457,24 +456,23 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0); ArrayCounterIdx[1] = ConstantInt::get(Int32Ty, getArrayCounterIdx(StructTy)); - Initializers.push_back( - ConstantStruct::get( - StructInfoTy, - ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), - ConstantInt::get(Int32Ty, - DL.getStructLayout(StructTy)->getSizeInBytes()), - ConstantInt::get(Int32Ty, StructTy->getNumElements()), - Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) : - ConstantExpr::getPointerCast(Offset, Int32PtrTy), - Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) : - ConstantExpr::getPointerCast(Size, Int32PtrTy), - TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) : - ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), - ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, - FieldCounterIdx), - ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, - ArrayCounterIdx), - nullptr)); + Initializers.push_back(ConstantStruct::get( + StructInfoTy, + ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), + ConstantInt::get(Int32Ty, + DL.getStructLayout(StructTy)->getSizeInBytes()), + ConstantInt::get(Int32Ty, StructTy->getNumElements()), + Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) + : ConstantExpr::getPointerCast(Offset, Int32PtrTy), + Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) + : ConstantExpr::getPointerCast(Size, Int32PtrTy), + TypeName == nullptr + ? ConstantPointerNull::get(Int8PtrPtrTy) + : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), + ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, + FieldCounterIdx), + ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, + ArrayCounterIdx))); } // Structs. Constant *StructInfo; @@ -491,11 +489,8 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( auto *CacheFragInfoGV = new GlobalVariable( M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage, - ConstantStruct::get(CacheFragInfoTy, - UnitName, - ConstantInt::get(Int32Ty, NumStructs), - StructInfo, - nullptr)); + ConstantStruct::get(CacheFragInfoTy, UnitName, + ConstantInt::get(Int32Ty, NumStructs), StructInfo)); return CacheFragInfoGV; } @@ -533,7 +528,7 @@ void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) { IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator()); Function *EsanExit = checkSanitizerInterfaceFunction( M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(), - Int8PtrTy, nullptr)); + Int8PtrTy)); EsanExit->setLinkage(Function::ExternalLinkage); IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg}); appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority); @@ -757,7 +752,7 @@ bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) { return false; } Type *SourceTy = GepInst->getSourceElementType(); - StructType *StructTy; + StructType *StructTy = nullptr; ConstantInt *Idx; // Check if GEP calculates address from a struct array. if (isa<StructType>(SourceTy)) { diff --git a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 1ba13bd..4089d81 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -1,4 +1,4 @@ -//===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===// +//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/IR/BasicBlock.h" @@ -40,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -65,13 +68,13 @@ static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, // For debug use only. static cl::opt<unsigned> ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, - cl::desc("Max number of promotions for this compilaiton")); + cl::desc("Max number of promotions for this compilation")); // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. // For debug use only. static cl::opt<unsigned> ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, - cl::desc("Skip Callsite up to this number for this compilaiton")); + cl::desc("Skip Callsite up to this number for this compilation")); // Set if the pass is called in LTO optimization. The difference for LTO mode // is the pass won't prefix the source module name to the internal linkage @@ -80,6 +83,12 @@ static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode")); +// Set if the pass is called in SamplePGO mode. The difference for SamplePGO +// mode is it will add prof metadatato the created direct call. +static cl::opt<bool> + ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, + cl::desc("Run indirect-call promotion in SamplePGO mode")); + // If the option is set to true, only call instructions will be considered for // transformation -- invoke instructions will be ignored. static cl::opt<bool> @@ -105,8 +114,8 @@ class PGOIndirectCallPromotionLegacyPass : public ModulePass { public: static char ID; - PGOIndirectCallPromotionLegacyPass(bool InLTO = false) - : ModulePass(ID), InLTO(InLTO) { + PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false) + : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) { initializePGOIndirectCallPromotionLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -119,6 +128,10 @@ private: // If this pass is called in LTO. We need to special handling the PGOFuncName // for the static variables due to LTO's internalization. bool InLTO; + + // If this pass is called in SamplePGO. We need to add the prof metadata to + // the promoted direct call. + bool SamplePGO; }; } // end anonymous namespace @@ -128,8 +141,9 @@ INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", "direct calls.", false, false) -ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO) { - return new PGOIndirectCallPromotionLegacyPass(InLTO); +ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, + bool SamplePGO) { + return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); } namespace { @@ -144,17 +158,11 @@ private: // defines. InstrProfSymtab *Symtab; - enum TargetStatus { - OK, // Should be able to promote. - NotAvailableInModule, // Cannot find the target in current module. - ReturnTypeMismatch, // Return type mismatch b/w target and indirect-call. - NumArgsMismatch, // Number of arguments does not match. - ArgTypeMismatch // Type mismatch in the arguments (cannot bitcast). - }; + bool SamplePGO; // Test if we can legally promote this direct-call of Target. - TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target, - Function *&F); + bool isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&F, + const char **Reason = nullptr); // A struct that records the direct target and it's call count. struct PromotionCandidate { @@ -172,91 +180,77 @@ private: Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates); - // Main function that transforms Inst (either a indirect-call instruction, or - // an invoke instruction , to a conditional call to F. This is like: - // if (Inst.CalledValue == F) - // F(...); - // else - // Inst(...); - // end - // TotalCount is the profile count value that the instruction executes. - // Count is the profile count value that F is the target function. - // These two values are being used to update the branch weight. - void promote(Instruction *Inst, Function *F, uint64_t Count, - uint64_t TotalCount); - // Promote a list of targets for one indirect-call callsite. Return // the number of promotions. uint32_t tryToPromote(Instruction *Inst, const std::vector<PromotionCandidate> &Candidates, uint64_t &TotalCount); - static const char *StatusToString(const TargetStatus S) { - switch (S) { - case OK: - return "OK to promote"; - case NotAvailableInModule: - return "Cannot find the target"; - case ReturnTypeMismatch: - return "Return type mismatch"; - case NumArgsMismatch: - return "The number of arguments mismatch"; - case ArgTypeMismatch: - return "Argument Type mismatch"; - } - llvm_unreachable("Should not reach here"); - } - // Noncopyable ICallPromotionFunc(const ICallPromotionFunc &other) = delete; ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; public: - ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab) - : F(Func), M(Modu), Symtab(Symtab) { - } + ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, + bool SamplePGO) + : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO) {} bool processFunction(); }; } // end anonymous namespace -ICallPromotionFunc::TargetStatus -ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, - Function *&TargetFunction) { - Function *DirectCallee = Symtab->getFunction(Target); - if (DirectCallee == nullptr) - return NotAvailableInModule; +bool llvm::isLegalToPromote(Instruction *Inst, Function *F, + const char **Reason) { // Check the return type. Type *CallRetType = Inst->getType(); if (!CallRetType->isVoidTy()) { - Type *FuncRetType = DirectCallee->getReturnType(); + Type *FuncRetType = F->getReturnType(); if (FuncRetType != CallRetType && - !CastInst::isBitCastable(FuncRetType, CallRetType)) - return ReturnTypeMismatch; + !CastInst::isBitCastable(FuncRetType, CallRetType)) { + if (Reason) + *Reason = "Return type mismatch"; + return false; + } } // Check if the arguments are compatible with the parameters - FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); + FunctionType *DirectCalleeType = F->getFunctionType(); unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); CallSite CS(Inst); unsigned ArgNum = CS.arg_size(); - if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) - return NumArgsMismatch; + if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) { + if (Reason) + *Reason = "The number of arguments mismatch"; + return false; + } for (unsigned I = 0; I < ParamNum; ++I) { Type *PTy = DirectCalleeType->getFunctionParamType(I); Type *ATy = CS.getArgument(I)->getType(); if (PTy == ATy) continue; - if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) - return ArgTypeMismatch; + if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) { + if (Reason) + *Reason = "Argument type mismatch"; + return false; + } } DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " - << Symtab->getFuncName(Target) << "\n"); - TargetFunction = DirectCallee; - return OK; + << F->getName() << "\n"); + return true; +} + +bool ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target, + Function *&TargetFunction, + const char **Reason) { + TargetFunction = Symtab->getFunction(Target); + if (TargetFunction == nullptr) { + *Reason = "Cannot find the target"; + return false; + } + return isLegalToPromote(Inst, TargetFunction, Reason); } // Indirect-call promotion heuristic. The direct targets are sorted based on @@ -296,10 +290,9 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( break; } Function *TargetFunction = nullptr; - TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction); - if (Status != OK) { + const char *Reason = nullptr; + if (!isPromotionLegal(Inst, Target, TargetFunction, &Reason)) { StringRef TargetFuncName = Symtab->getFuncName(Target); - const char *Reason = StatusToString(Status); DEBUG(dbgs() << " Not promote: " << Reason << "\n"); emitOptimizationRemarkMissed( F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(), @@ -532,8 +525,14 @@ static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult, // Ret = phi(Ret1, Ret2); // It adds type casts for the args do not match the parameters and the return // value. Branch weights metadata also updated. -void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, - uint64_t Count, uint64_t TotalCount) { +// If \p AttachProfToDirectCall is true, a prof metadata is attached to the +// new direct call to contain \p Count. This is used by SamplePGO inliner to +// check callsite hotness. +// Returns the promoted direct call instruction. +Instruction *llvm::promoteIndirectCall(Instruction *Inst, + Function *DirectCallee, uint64_t Count, + uint64_t TotalCount, + bool AttachProfToDirectCall) { assert(DirectCallee != nullptr); BasicBlock *BB = Inst->getParent(); // Just to suppress the non-debug build warning. @@ -548,6 +547,14 @@ void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, Instruction *NewInst = createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); + if (AttachProfToDirectCall) { + SmallVector<uint32_t, 1> Weights; + Weights.push_back(Count); + MDBuilder MDB(NewInst->getContext()); + dyn_cast<Instruction>(NewInst->stripPointerCasts()) + ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + } + // Move Inst from MergeBB to IndirectCallBB. Inst->removeFromParent(); IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), @@ -576,9 +583,10 @@ void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); emitOptimizationRemark( - F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(), + BB->getContext(), "pgo-icall-prom", *BB->getParent(), Inst->getDebugLoc(), Twine("Promote indirect call to ") + DirectCallee->getName() + " with count " + Twine(Count) + " out of " + Twine(TotalCount)); + return NewInst; } // Promote indirect-call to conditional direct-call for one callsite. @@ -589,7 +597,7 @@ uint32_t ICallPromotionFunc::tryToPromote( for (auto &C : Candidates) { uint64_t Count = C.Count; - promote(Inst, C.TargetFunction, Count, TotalCount); + promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO); assert(TotalCount >= Count); TotalCount -= Count; NumOfPGOICallPromotion++; @@ -630,18 +638,23 @@ bool ICallPromotionFunc::processFunction() { } // A wrapper function that does the actual work. -static bool promoteIndirectCalls(Module &M, bool InLTO) { +static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) { if (DisableICP) return false; InstrProfSymtab Symtab; - Symtab.create(M, InLTO); + if (Error E = Symtab.create(M, InLTO)) { + std::string SymtabFailure = toString(std::move(E)); + DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n"); + (void)SymtabFailure; + return false; + } bool Changed = false; for (auto &F : M) { if (F.isDeclaration()) continue; if (F.hasFnAttribute(Attribute::OptimizeNone)) continue; - ICallPromotionFunc ICallPromotion(F, &M, &Symtab); + ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO); bool FuncChanged = ICallPromotion.processFunction(); if (ICPDUMPAFTER && FuncChanged) { DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); @@ -658,11 +671,14 @@ static bool promoteIndirectCalls(Module &M, bool InLTO) { bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { // Command-line option has the priority for InLTO. - return promoteIndirectCalls(M, InLTO | ICPLTOMode); + return promoteIndirectCalls(M, InLTO | ICPLTOMode, + SamplePGO | ICPSamplePGOMode); } -PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, ModuleAnalysisManager &AM) { - if (!promoteIndirectCalls(M, InLTO | ICPLTOMode)) +PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, + ModuleAnalysisManager &AM) { + if (!promoteIndirectCalls(M, InLTO | ICPLTOMode, + SamplePGO | ICPSamplePGOMode)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index adea7e7..db8fa89 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -14,18 +14,63 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/InstrProfiling.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <string> using namespace llvm; #define DEBUG_TYPE "instrprof" +// The start and end values of precise value profile range for memory +// intrinsic sizes +cl::opt<std::string> MemOPSizeRange( + "memop-size-range", + cl::desc("Set the range of size in memory intrinsic calls to be profiled " + "precisely, in a format of <start_val>:<end_val>"), + cl::init("")); + +// The value that considered to be large value in memory intrinsic. +cl::opt<unsigned> MemOPSizeLarge( + "memop-size-large", + cl::desc("Set large value thresthold in memory intrinsic size profiling. " + "Value of 0 disables the large value profiling."), + cl::init(8192)); + namespace { cl::opt<bool> DoNameCompression("enable-name-compression", @@ -41,6 +86,7 @@ cl::opt<bool> ValueProfileStaticAlloc( "vp-static-alloc", cl::desc("Do static counter allocation for value profiler"), cl::init(true)); + cl::opt<double> NumCountersPerValueSite( "vp-counters-per-site", cl::desc("The average number of profile counters allocated " @@ -51,14 +97,56 @@ cl::opt<double> NumCountersPerValueSite( // is usually smaller than 2. cl::init(1.0)); +cl::opt<bool> AtomicCounterUpdatePromoted( + "atomic-counter-update-promoted", cl::ZeroOrMore, + cl::desc("Do counter update using atomic fetch add " + " for promoted counters only"), + cl::init(false)); + +// If the option is not specified, the default behavior about whether +// counter promotion is done depends on how instrumentaiton lowering +// pipeline is setup, i.e., the default value of true of this option +// does not mean the promotion will be done by default. Explicitly +// setting this option can override the default behavior. +cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, + cl::desc("Do counter register promotion"), + cl::init(false)); +cl::opt<unsigned> MaxNumOfPromotionsPerLoop( + cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20), + cl::desc("Max number counter promotions per loop to avoid" + " increasing register pressure too much")); + +// A debug option +cl::opt<int> + MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), + cl::desc("Max number of allowed counter promotions")); + +cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting( + cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3), + cl::desc("The max number of exiting blocks of a loop to allow " + " speculative counter promotion")); + +cl::opt<bool> SpeculativeCounterPromotionToLoop( + cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false), + cl::desc("When the option is false, if the target block is in a loop, " + "the promotion will be disallowed unless the promoted counter " + " update can be further/iteratively promoted into an acyclic " + " region.")); + +cl::opt<bool> IterativeCounterPromotion( + cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), + cl::desc("Allow counter promotion across the whole loop nest.")); + class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; public: static char ID; - InstrProfilingLegacyPass() : ModulePass(ID), InstrProf() {} + + InstrProfilingLegacyPass() : ModulePass(ID) {} InstrProfilingLegacyPass(const InstrProfOptions &Options) : ModulePass(ID), InstrProf(Options) {} + StringRef getPassName() const override { return "Frontend instrumentation-based coverage lowering"; } @@ -73,7 +161,184 @@ public: } }; -} // anonymous namespace +/// +/// A helper class to promote one counter RMW operation in the loop +/// into register update. +/// +/// RWM update for the counter will be sinked out of the loop after +/// the transformation. +/// +class PGOCounterPromoterHelper : public LoadAndStorePromoter { +public: + PGOCounterPromoterHelper( + Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, + BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks, + ArrayRef<Instruction *> InsertPts, + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, + LoopInfo &LI) + : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), + InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { + assert(isa<LoadInst>(L)); + assert(isa<StoreInst>(S)); + SSA.AddAvailableValue(PH, Init); + } + + void doExtraRewritesBeforeFinalDeletion() const override { + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + Instruction *InsertPos = InsertPts[i]; + // Get LiveIn value into the ExitBlock. If there are multiple + // predecessors, the value is defined by a PHI node in this + // block. + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Value *Addr = cast<StoreInst>(Store)->getPointerOperand(); + IRBuilder<> Builder(InsertPos); + if (AtomicCounterUpdatePromoted) + // automic update currently can only be promoted across the current + // loop, not the whole loop nest. + Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, + AtomicOrdering::SequentiallyConsistent); + else { + LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted"); + auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); + auto *NewStore = Builder.CreateStore(NewVal, Addr); + + // Now update the parent loop's candidate list: + if (IterativeCounterPromotion) { + auto *TargetLoop = LI.getLoopFor(ExitBlock); + if (TargetLoop) + LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); + } + } + } + } + +private: + Instruction *Store; + ArrayRef<BasicBlock *> ExitBlocks; + ArrayRef<Instruction *> InsertPts; + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; + LoopInfo &LI; +}; + +/// A helper class to do register promotion for all profile counter +/// updates in a loop. +/// +class PGOCounterPromoter { +public: + PGOCounterPromoter( + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, + Loop &CurLoop, LoopInfo &LI) + : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), + LI(LI) { + + SmallVector<BasicBlock *, 8> LoopExitBlocks; + SmallPtrSet<BasicBlock *, 8> BlockSet; + L.getExitBlocks(LoopExitBlocks); + + for (BasicBlock *ExitBlock : LoopExitBlocks) { + if (BlockSet.insert(ExitBlock).second) { + ExitBlocks.push_back(ExitBlock); + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + } + } + } + + bool run(int64_t *NumPromoted) { + unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); + if (MaxProm == 0) + return false; + + unsigned Promoted = 0; + for (auto &Cand : LoopToCandidates[&L]) { + + SmallVector<PHINode *, 4> NewPHIs; + SSAUpdater SSA(&NewPHIs); + Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, + L.getLoopPreheader(), ExitBlocks, + InsertPts, LoopToCandidates, LI); + Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second})); + Promoted++; + if (Promoted >= MaxProm) + break; + + (*NumPromoted)++; + if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) + break; + } + + DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" + << L.getLoopDepth() << ")\n"); + return Promoted != 0; + } + +private: + bool allowSpeculativeCounterPromotion(Loop *LP) { + SmallVector<BasicBlock *, 8> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return true; + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return false; + return true; + } + + // Returns the max number of Counter Promotions for LP. + unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { + // We can't insert into a catchswitch. + SmallVector<BasicBlock *, 8> LoopExitBlocks; + LP->getExitBlocks(LoopExitBlocks); + if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { + return isa<CatchSwitchInst>(Exit->getTerminator()); + })) + return 0; + + if (!LP->hasDedicatedExits()) + return 0; + + BasicBlock *PH = LP->getLoopPreheader(); + if (!PH) + return 0; + + SmallVector<BasicBlock *, 8> ExitingBlocks; + LP->getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return MaxNumOfPromotionsPerLoop; + + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return 0; + + // Whether the target block is in a loop does not matter: + if (SpeculativeCounterPromotionToLoop) + return MaxNumOfPromotionsPerLoop; + + // Now check the target block: + unsigned MaxProm = MaxNumOfPromotionsPerLoop; + for (auto *TargetBlock : LoopExitBlocks) { + auto *TargetLoop = LI.getLoopFor(TargetBlock); + if (!TargetLoop) + continue; + unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); + unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); + MaxProm = + std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - + PendingCandsInTarget); + } + return MaxProm; + } + + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; + SmallVector<BasicBlock *, 8> ExitBlocks; + SmallVector<Instruction *, 8> InsertPts; + Loop &L; + LoopInfo &LI; +}; + +} // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); @@ -97,35 +362,70 @@ llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { return new InstrProfilingLegacyPass(Options); } -bool InstrProfiling::isMachO() const { - return Triple(M->getTargetTriple()).isOSBinFormatMachO(); +static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { + InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr); + if (Inc) + return Inc; + return dyn_cast<InstrProfIncrementInst>(Instr); } -/// Get the section name for the counter variables. -StringRef InstrProfiling::getCountersSection() const { - return getInstrProfCountersSectionName(isMachO()); -} +bool InstrProfiling::lowerIntrinsics(Function *F) { + bool MadeChange = false; + PromotionCandidates.clear(); + for (BasicBlock &BB : *F) { + for (auto I = BB.begin(), E = BB.end(); I != E;) { + auto Instr = I++; + InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); + if (Inc) { + lowerIncrement(Inc); + MadeChange = true; + } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) { + lowerValueProfileInst(Ind); + MadeChange = true; + } + } + } -/// Get the section name for the name variables. -StringRef InstrProfiling::getNameSection() const { - return getInstrProfNameSectionName(isMachO()); -} + if (!MadeChange) + return false; -/// Get the section name for the profile data variables. -StringRef InstrProfiling::getDataSection() const { - return getInstrProfDataSectionName(isMachO()); + promoteCounterLoadStores(F); + return true; } -/// Get the section name for the coverage mapping data. -StringRef InstrProfiling::getCoverageSection() const { - return getInstrProfCoverageSectionName(isMachO()); +bool InstrProfiling::isCounterPromotionEnabled() const { + if (DoCounterPromotion.getNumOccurrences() > 0) + return DoCounterPromotion; + + return Options.DoCounterPromotion; } -static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { - InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr); - if (Inc) - return Inc; - return dyn_cast<InstrProfIncrementInst>(Instr); +void InstrProfiling::promoteCounterLoadStores(Function *F) { + if (!isCounterPromotionEnabled()) + return; + + DominatorTree DT(*F); + LoopInfo LI(DT); + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates; + + for (const auto &LoadStore : PromotionCandidates) { + auto *CounterLoad = LoadStore.first; + auto *CounterStore = LoadStore.second; + BasicBlock *BB = CounterLoad->getParent(); + Loop *ParentLoop = LI.getLoopFor(BB); + if (!ParentLoop) + continue; + LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore); + } + + SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder(); + + // Do a post-order traversal of the loops so that counter updates can be + // iteratively hoisted outside the loop nest. + for (auto *Loop : llvm::reverse(Loops)) { + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + Promoter.run(&TotalCountersPromoted); + } } bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { @@ -137,6 +437,9 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { NamesSize = 0; ProfileDataMap.clear(); UsedVars.clear(); + getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart, + MemOPSizeRangeLast); + TT = Triple(M.getTargetTriple()); // We did not know how many value sites there would be inside // the instrumented function. This is counting the number of instrumented @@ -157,18 +460,7 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { } for (Function &F : M) - for (BasicBlock &BB : F) - for (auto I = BB.begin(), E = BB.end(); I != E;) { - auto Instr = I++; - InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); - if (Inc) { - lowerIncrement(Inc); - MadeChange = true; - } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) { - lowerValueProfileInst(Ind); - MadeChange = true; - } - } + MadeChange |= lowerIntrinsics(&F); if (GlobalVariable *CoverageNamesVar = M.getNamedGlobal(getCoverageUnusedNamesVarName())) { @@ -189,26 +481,42 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { } static Constant *getOrInsertValueProfilingCall(Module &M, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + bool IsRange = false) { LLVMContext &Ctx = M.getContext(); auto *ReturnTy = Type::getVoidTy(M.getContext()); - Type *ParamTypes[] = { + + Constant *Res; + if (!IsRange) { + Type *ParamTypes[] = { #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType #include "llvm/ProfileData/InstrProfData.inc" - }; - auto *ValueProfilingCallTy = - FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); - Constant *Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(), - ValueProfilingCallTy); + }; + auto *ValueProfilingCallTy = + FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); + Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(), + ValueProfilingCallTy); + } else { + Type *RangeParamTypes[] = { +#define VALUE_RANGE_PROF 1 +#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType +#include "llvm/ProfileData/InstrProfData.inc" +#undef VALUE_RANGE_PROF + }; + auto *ValueRangeProfilingCallTy = + FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false); + Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(), + ValueRangeProfilingCallTy); + } + if (Function *FunRes = dyn_cast<Function>(Res)) { if (auto AK = TLI.getExtAttrForI32Param(false)) - FunRes->addAttribute(3, AK); + FunRes->addParamAttr(2, AK); } return Res; } void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { - GlobalVariable *Name = Ind->getName(); uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); uint64_t Index = Ind->getIndex()->getZExtValue(); @@ -222,7 +530,6 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { } void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { - GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); assert(It != ProfileDataMap.end() && It->second.DataVar && @@ -235,13 +542,27 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Index += It->second.NumValueSites[Kind]; IRBuilder<> Builder(Ind); - Value *Args[3] = {Ind->getTargetValue(), - Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), - Builder.getInt32(Index)}; - CallInst *Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), - Args); + bool IsRange = (Ind->getValueKind()->getZExtValue() == + llvm::InstrProfValueKind::IPVK_MemOPSize); + CallInst *Call = nullptr; + if (!IsRange) { + Value *Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; + Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args); + } else { + Value *Args[6] = { + Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index), + Builder.getInt64(MemOPSizeRangeStart), + Builder.getInt64(MemOPSizeRangeLast), + Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)}; + Call = + Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); + } if (auto AK = TLI->getExtAttrForI32Param(false)) - Call->addAttribute(3, AK); + Call->addParamAttr(2, AK); Ind->replaceAllUsesWith(Call); Ind->eraseFromParent(); } @@ -252,14 +573,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { IRBuilder<> Builder(Inc); uint64_t Index = Inc->getIndex()->getZExtValue(); Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); - Value *Count = Builder.CreateLoad(Addr, "pgocount"); - Count = Builder.CreateAdd(Count, Inc->getStep()); - Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr)); + Value *Load = Builder.CreateLoad(Addr, "pgocount"); + auto *Count = Builder.CreateAdd(Load, Inc->getStep()); + auto *Store = Builder.CreateStore(Count, Addr); + Inc->replaceAllUsesWith(Store); + if (isCounterPromotionEnabled()) + PromotionCandidates.emplace_back(cast<Instruction>(Load), Store); Inc->eraseFromParent(); } void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { - ConstantArray *Names = cast<ConstantArray>(CoverageNamesVar->getInitializer()); for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) { @@ -270,7 +593,9 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { Name->setLinkage(GlobalValue::PrivateLinkage); ReferencedNames.push_back(Name); + NC->dropAllReferences(); } + CoverageNamesVar->eraseFromParent(); } /// Get the name of a profiling variable for a particular function. @@ -291,14 +616,24 @@ static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { static inline bool shouldRecordFunctionAddr(Function *F) { // Check the linkage + bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && - !F->hasAvailableExternallyLinkage()) + !HasAvailableExternallyLinkage) return true; + + // A function marked 'alwaysinline' with available_externally linkage can't + // have its address taken. Doing so would create an undefined external ref to + // the function, which would fail to link. + if (HasAvailableExternallyLinkage && + F->hasFnAttribute(Attribute::AlwaysInline)) + return false; + // Prohibit function address recording if the function is both internal and // COMDAT. This avoids the profile data variable referencing internal symbols // in COMDAT. if (F->hasLocalLinkage() && F->hasComdat()) return false; + // Check uses of this function for other than direct calls or invokes to it. // Inline virtual functions have linkeOnceODR linkage. When a key method // exists, the vtable will only be emitted in the TU where the key method @@ -367,7 +702,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Constant::getNullValue(CounterTy), getVarName(Inc, getInstrProfCountersVarPrefix())); CounterPtr->setVisibility(NamePtr->getVisibility()); - CounterPtr->setSection(getCountersSection()); + CounterPtr->setSection( + getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); CounterPtr->setAlignment(8); CounterPtr->setComdat(ProfileVarsComdat); @@ -376,7 +712,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { // the current function. Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy); if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) { - uint64_t NS = 0; for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) NS += PD.NumValueSites[Kind]; @@ -388,11 +723,12 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Constant::getNullValue(ValuesTy), getVarName(Inc, getInstrProfValuesVarPrefix())); ValuesVar->setVisibility(NamePtr->getVisibility()); - ValuesVar->setSection(getInstrProfValuesSectionName(isMachO())); + ValuesVar->setSection( + getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(8); ValuesVar->setComdat(ProfileVarsComdat); ValuesPtrExpr = - ConstantExpr::getBitCast(ValuesVar, llvm::Type::getInt8PtrTy(Ctx)); + ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } } @@ -421,7 +757,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ConstantStruct::get(DataTy, DataVals), getVarName(Inc, getInstrProfDataVarPrefix())); Data->setVisibility(NamePtr->getVisibility()); - Data->setSection(getDataSection()); + Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT); Data->setComdat(ProfileVarsComdat); @@ -481,9 +817,10 @@ void InstrProfiling::emitVNodes() { ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters); auto *VNodesVar = new GlobalVariable( - *M, VNodesTy, false, llvm::GlobalValue::PrivateLinkage, + *M, VNodesTy, false, GlobalValue::PrivateLinkage, Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); - VNodesVar->setSection(getInstrProfVNodesSectionName(isMachO())); + VNodesVar->setSection( + getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); UsedVars.push_back(VNodesVar); } @@ -496,18 +833,22 @@ void InstrProfiling::emitNameData() { std::string CompressedNameStr; if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, DoNameCompression)) { - llvm::report_fatal_error(toString(std::move(E)), false); + report_fatal_error(toString(std::move(E)), false); } auto &Ctx = M->getContext(); - auto *NamesVal = llvm::ConstantDataArray::getString( + auto *NamesVal = ConstantDataArray::getString( Ctx, StringRef(CompressedNameStr), false); - NamesVar = new llvm::GlobalVariable(*M, NamesVal->getType(), true, - llvm::GlobalValue::PrivateLinkage, - NamesVal, getInstrProfNamesVarName()); + NamesVar = new GlobalVariable(*M, NamesVal->getType(), true, + GlobalValue::PrivateLinkage, NamesVal, + getInstrProfNamesVarName()); NamesSize = CompressedNameStr.size(); - NamesVar->setSection(getNameSection()); + NamesVar->setSection( + getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); UsedVars.push_back(NamesVar); + + for (auto *NamePtr : ReferencedNames) + NamePtr->eraseFromParent(); } void InstrProfiling::emitRegistration() { @@ -550,7 +891,6 @@ void InstrProfiling::emitRegistration() { } void InstrProfiling::emitRuntimeHook() { - // We expect the linker to be invoked with -u<hook_var> flag for linux, // for which case there is no need to emit the user function. if (Triple(M->getTargetTriple()).isOSLinux()) @@ -600,7 +940,6 @@ void InstrProfiling::emitInitialization() { GlobalVariable *ProfileNameVar = new GlobalVariable( *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); - Triple TT(M->getTargetTriple()); if (TT.supportsCOMDAT()) { ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); ProfileNameVar->setComdat(M->getOrInsertComdat( diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 2963d08..7bb62d2 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -63,6 +63,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializePGOInstrumentationGenLegacyPassPass(Registry); initializePGOInstrumentationUseLegacyPassPass(Registry); initializePGOIndirectCallPromotionLegacyPassPass(Registry); + initializePGOMemOPSizeOptLegacyPassPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 363539b..4eb758c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H -#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/IR/BasicBlock.h" @@ -108,4 +108,4 @@ namespace llvm { } // End llvm namespace -#endif +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index fafb0fc..b7c6271 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -425,7 +425,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) { // which is not yet implemented. StringRef WarningFnName = Recover ? "__msan_warning" : "__msan_warning_noreturn"; - WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), nullptr); + WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy()); for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { @@ -433,31 +433,31 @@ void MemorySanitizer::initializeCallbacks(Module &M) { std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize); MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), - IRB.getInt32Ty(), nullptr); + IRB.getInt32Ty()); FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), - IRB.getInt8PtrTy(), IRB.getInt32Ty(), nullptr); + IRB.getInt8PtrTy(), IRB.getInt32Ty()); } MsanSetAllocaOrigin4Fn = M.getOrInsertFunction( "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, - IRB.getInt8PtrTy(), IntptrTy, nullptr); + IRB.getInt8PtrTy(), IntptrTy); MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr); + IRB.getInt8PtrTy(), IntptrTy); MsanChainOriginFn = M.getOrInsertFunction( - "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty(), nullptr); + "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty()); MemmoveFn = M.getOrInsertFunction( "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr); + IRB.getInt8PtrTy(), IntptrTy); MemcpyFn = M.getOrInsertFunction( "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IntptrTy, nullptr); + IntptrTy); MemsetFn = M.getOrInsertFunction( "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), - IntptrTy, nullptr); + IntptrTy); // Create globals. RetvalTLS = new GlobalVariable( @@ -1037,15 +1037,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { OriginMap[V] = Origin; } + Constant *getCleanShadow(Type *OrigTy) { + Type *ShadowTy = getShadowTy(OrigTy); + if (!ShadowTy) + return nullptr; + return Constant::getNullValue(ShadowTy); + } + /// \brief Create a clean shadow value for a given value. /// /// Clean shadow (all zeroes) means all bits of the value are defined /// (initialized). Constant *getCleanShadow(Value *V) { - Type *ShadowTy = getShadowTy(V); - if (!ShadowTy) - return nullptr; - return Constant::getNullValue(ShadowTy); + return getCleanShadow(V->getType()); } /// \brief Create a dirty shadow of a given shadow type. @@ -1572,13 +1576,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy, bool Signed = false) { Type *srcTy = V->getType(); + size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); + size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); + if (srcSizeInBits > 1 && dstSizeInBits == 1) + return IRB.CreateICmpNE(V, getCleanShadow(V)); + if (dstTy->isIntegerTy() && srcTy->isIntegerTy()) return IRB.CreateIntCast(V, dstTy, Signed); if (dstTy->isVectorTy() && srcTy->isVectorTy() && dstTy->getVectorNumElements() == srcTy->getVectorNumElements()) return IRB.CreateIntCast(V, dstTy, Signed); - size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); - size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits)); Value *V2 = IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed); @@ -1942,7 +1949,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); - // FIXME: use ClStoreCleanOrigin // FIXME: factor out common code from materializeStores if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB, 1)); @@ -2081,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { switch (I.getNumArgOperands()) { case 3: assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode"); + LLVM_FALLTHROUGH; case 2: CopyOp = I.getArgOperand(0); ConvertOp = I.getArgOperand(1); @@ -2325,11 +2332,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + void handleStmxcsr(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Value* Addr = I.getArgOperand(0); + Type *Ty = IRB.getInt32Ty(); + Value *ShadowPtr = getShadowPtr(Addr, Ty, IRB); + + IRB.CreateStore(getCleanShadow(Ty), + IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo())); + + if (ClCheckAccessAddress) + insertShadowCheck(Addr, &I); + } + + void handleLdmxcsr(IntrinsicInst &I) { + if (!InsertChecks) return; + + IRBuilder<> IRB(&I); + Value *Addr = I.getArgOperand(0); + Type *Ty = IRB.getInt32Ty(); + unsigned Alignment = 1; + + if (ClCheckAccessAddress) + insertShadowCheck(Addr, &I); + + Value *Shadow = IRB.CreateAlignedLoad(getShadowPtr(Addr, Ty, IRB), + Alignment, "_ldmxcsr"); + Value *Origin = MS.TrackOrigins + ? IRB.CreateLoad(getOriginPtr(Addr, IRB, Alignment)) + : getCleanOrigin(); + insertShadowCheck(Shadow, Origin, &I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case llvm::Intrinsic::bswap: handleBswap(I); break; + case llvm::Intrinsic::x86_sse_stmxcsr: + handleStmxcsr(I); + break; + case llvm::Intrinsic::x86_sse_ldmxcsr: + handleLdmxcsr(I); + break; case llvm::Intrinsic::x86_avx512_vcvtsd2usi64: case llvm::Intrinsic::x86_avx512_vcvtsd2usi32: case llvm::Intrinsic::x86_avx512_vcvtss2usi64: @@ -2566,10 +2611,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - Func->removeAttributes(AttributeSet::FunctionIndex, - AttributeSet::get(Func->getContext(), - AttributeSet::FunctionIndex, - B)); + Func->removeAttributes(AttributeList::FunctionIndex, B); } maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); @@ -2597,12 +2639,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { " Shadow: " << *ArgShadow << "\n"); bool ArgIsInitialized = false; const DataLayout &DL = F.getParent()->getDataLayout(); - if (CS.paramHasAttr(i + 1, Attribute::ByVal)) { + if (CS.paramHasAttr(i, Attribute::ByVal)) { assert(A->getType()->isPointerTy() && "ByVal argument is not a pointer!"); Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); if (ArgOffset + Size > kParamTLSSize) break; - unsigned ParamAlignment = CS.getParamAlignment(i + 1); + unsigned ParamAlignment = CS.getParamAlignment(i); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); Store = IRB.CreateMemCpy(ArgShadowBase, getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), @@ -2690,7 +2732,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } else { Value *Shadow = getShadow(RetVal); IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); - // FIXME: make it conditional if ClStoreCleanOrigin==0 if (MS.TrackOrigins) IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); } @@ -2717,15 +2758,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(&I, getCleanOrigin()); IRBuilder<> IRB(I.getNextNode()); const DataLayout &DL = F.getParent()->getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType()); + uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType()); + Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize); + if (I.isArrayAllocation()) + Len = IRB.CreateMul(Len, I.getArraySize()); if (PoisonStack && ClPoisonStackWithCall) { IRB.CreateCall(MS.MsanPoisonStackFn, - {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), - ConstantInt::get(MS.IntptrTy, Size)}); + {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len}); } else { Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB); Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0); - IRB.CreateMemSet(ShadowBase, PoisonValue, Size, I.getAlignment()); + IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment()); } if (PoisonStack && MS.TrackOrigins) { @@ -2742,8 +2785,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { StackDescription.str()); IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn, - {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), - ConstantInt::get(MS.IntptrTy, Size), + {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len, IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()), IRB.CreatePointerCast(&F, MS.IntptrTy)}); } @@ -2876,8 +2918,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (ClDumpStrictInstructions) dumpInst(I); DEBUG(dbgs() << "DEFAULT: " << I << "\n"); - for (size_t i = 0, n = I.getNumOperands(); i < n; i++) - insertShadowCheck(I.getOperand(i), &I); + for (size_t i = 0, n = I.getNumOperands(); i < n; i++) { + Value *Operand = I.getOperand(i); + if (Operand->getType()->isSized()) + insertShadowCheck(Operand, &I); + } setShadow(&I, getCleanShadow(&I)); setOrigin(&I, getCleanOrigin()); } @@ -2935,7 +2980,7 @@ struct VarArgAMD64Helper : public VarArgHelper { Value *A = *ArgIt; unsigned ArgNo = CS.getArgumentNo(ArgIt); bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); - bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); + bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal); if (IsByVal) { // ByVal arguments always go to the overflow area. // Fixed arguments passed through the overflow area will be stepped @@ -2994,7 +3039,7 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVAStartInst(VAStartInst &I) override { - if (F.getCallingConv() == CallingConv::X86_64_Win64) + if (F.getCallingConv() == CallingConv::Win64) return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); @@ -3008,7 +3053,7 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVACopyInst(VACopyInst &I) override { - if (F.getCallingConv() == CallingConv::X86_64_Win64) + if (F.getCallingConv() == CallingConv::Win64) return; IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); @@ -3456,12 +3501,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper { Value *A = *ArgIt; unsigned ArgNo = CS.getArgumentNo(ArgIt); bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); - bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); + bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal); if (IsByVal) { assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); + uint64_t ArgAlign = CS.getParamAlignment(ArgNo); if (ArgAlign < 8) ArgAlign = 8; VAArgOffset = alignTo(VAArgOffset, ArgAlign); @@ -3618,9 +3663,7 @@ bool MemorySanitizer::runOnFunction(Function &F) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F.removeAttributes(AttributeSet::FunctionIndex, - AttributeSet::get(F.getContext(), - AttributeSet::FunctionIndex, B)); + F.removeAttributes(AttributeList::FunctionIndex, B); return Visitor.runOnFunction(); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 04f9a64..8e4bfc0 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -58,8 +58,10 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -71,7 +73,9 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/JamCRC.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -87,6 +91,7 @@ using namespace llvm; STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); +STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); STATISTIC(NumOfPGOEdge, "Number of edges."); STATISTIC(NumOfPGOBB, "Number of basic-blocks."); STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); @@ -116,6 +121,13 @@ static cl::opt<unsigned> MaxNumAnnotations( cl::desc("Max number of annotations for a single indirect " "call callsite")); +// Command line option to set the maximum number of value annotations +// to write to the metadata for a single memop intrinsic. +static cl::opt<unsigned> MaxNumMemOPAnnotations( + "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of preicise value annotations for a single memop" + "intrinsic")); + // Command line option to control appending FunctionHash to the name of a COMDAT // function. This is to avoid the hash mismatch caused by the preinliner. static cl::opt<bool> DoComdatRenaming( @@ -125,26 +137,102 @@ static cl::opt<bool> DoComdatRenaming( // Command line option to enable/disable the warning about missing profile // information. -static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", - cl::init(false), - cl::Hidden); +static cl::opt<bool> + PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about missing profile data for " + "functions.")); // Command line option to enable/disable the warning about a hash mismatch in // the profile data. -static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), - cl::Hidden); +static cl::opt<bool> + NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn off/on " + "warnings about profile cfg mismatch.")); // Command line option to enable/disable the warning about a hash mismatch in // the profile data for Comdat functions, which often turns out to be false // positive due to the pre-instrumentation inline. -static cl::opt<bool> NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", - cl::init(true), cl::Hidden); +static cl::opt<bool> + NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), + cl::Hidden, + cl::desc("The option is used to turn on/off " + "warnings about hash mismatch for comdat " + "functions.")); // Command line option to enable/disable select instruction instrumentation. -static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true), - cl::Hidden); +static cl::opt<bool> + PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, + cl::desc("Use this option to turn on/off SELECT " + "instruction instrumentation. ")); + +// Command line option to turn on CFG dot dump of raw profile counts +static cl::opt<bool> + PGOViewRawCounts("pgo-view-raw-counts", cl::init(false), cl::Hidden, + cl::desc("A boolean option to show CFG dag " + "with raw profile counts from " + "profile data. See also option " + "-pgo-view-counts. To limit graph " + "display to only one function, use " + "filtering option -view-bfi-func-name.")); + +// Command line option to enable/disable memop intrinsic call.size profiling. +static cl::opt<bool> + PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, + cl::desc("Use this option to turn on/off " + "memory intrinsic size profiling.")); + +// Emit branch probability as optimization remarks. +static cl::opt<bool> + EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, + cl::desc("When this option is on, the annotated " + "branch probability will be emitted as " + " optimization remarks: -Rpass-analysis=" + "pgo-instr-use")); + +// Command line option to turn on CFG dot dump after profile annotation. +// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts +extern cl::opt<bool> PGOViewCounts; + +// Command line option to specify the name of the function for CFG dump +// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= +extern cl::opt<std::string> ViewBlockFreqFuncName; + namespace { +// Return a string describing the branch condition that can be +// used in static branch probability heuristics: +std::string getBranchCondString(Instruction *TI) { + BranchInst *BI = dyn_cast<BranchInst>(TI); + if (!BI || !BI->isConditional()) + return std::string(); + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return std::string(); + + std::string result; + raw_string_ostream OS(result); + OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; + CI->getOperand(0)->getType()->print(OS, true); + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast<ConstantInt>(RHS); + if (CV) { + if (CV->isZero()) + OS << "_Zero"; + else if (CV->isOne()) + OS << "_One"; + else if (CV->isMinusOne()) + OS << "_MinusOne"; + else + OS << "_Const"; + } + OS.flush(); + return result; +} + /// The select instruction visitor plays three roles specified /// by the mode. In \c VM_counting mode, it simply counts the number of /// select instructions. In \c VM_instrument mode, it inserts code to count @@ -167,6 +255,7 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { SelectInstVisitor(Function &Func) : F(Func) {} void countSelects(Function &Func) { + NSIs = 0; Mode = VM_counting; visit(Func); } @@ -196,9 +285,54 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { void annotateOneSelectInst(SelectInst &SI); // Visit \p SI instruction and perform tasks according to visit mode. void visitSelectInst(SelectInst &SI); + // Return the number of select instructions. This needs be called after + // countSelects(). unsigned getNumOfSelectInsts() const { return NSIs; } }; +/// Instruction Visitor class to visit memory intrinsic calls. +struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> { + Function &F; + unsigned NMemIs = 0; // Number of memIntrinsics instrumented. + VisitMode Mode = VM_counting; // Visiting mode. + unsigned CurCtrId = 0; // Current counter index. + unsigned TotalNumCtrs = 0; // Total number of counters + GlobalVariable *FuncNameVar = nullptr; + uint64_t FuncHash = 0; + PGOUseFunc *UseFunc = nullptr; + std::vector<Instruction *> Candidates; + + MemIntrinsicVisitor(Function &Func) : F(Func) {} + + void countMemIntrinsics(Function &Func) { + NMemIs = 0; + Mode = VM_counting; + visit(Func); + } + + void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, + GlobalVariable *FNV, uint64_t FHash) { + Mode = VM_instrument; + TotalNumCtrs = TotalNC; + FuncHash = FHash; + FuncNameVar = FNV; + visit(Func); + } + + std::vector<Instruction *> findMemIntrinsics(Function &Func) { + Candidates.clear(); + Mode = VM_annotate; + visit(Func); + return Candidates; + } + + // Visit the IR stream and annotate all mem intrinsic call instructions. + void instrumentOneMemIntrinsic(MemIntrinsic &MI); + // Visit \p MI instruction and perform tasks according to visit mode. + void visitMemIntrinsic(MemIntrinsic &SI); + unsigned getNumOfMemIntrinsics() const { return NMemIs; } +}; + class PGOInstrumentationGenLegacyPass : public ModulePass { public: static char ID; @@ -316,8 +450,9 @@ private: std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; public: - std::vector<Instruction *> IndirectCallSites; + std::vector<std::vector<Instruction *>> ValueSites; SelectInstVisitor SIVisitor; + MemIntrinsicVisitor MIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; // CFG hash value for this function. @@ -347,13 +482,16 @@ public: std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0), - MST(F, BPI, BFI) { + : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), + SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); + MIVisitor.countMemIntrinsics(Func); NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - IndirectCallSites = findIndirectCallSites(Func); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func); + ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); computeCFGHash(); @@ -405,7 +543,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { } JC.update(Indexes); FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | - (uint64_t)IndirectCallSites.size() << 48 | + (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); } @@ -552,7 +690,7 @@ static void instrumentOneFunc( return; unsigned NumIndirectCallSites = 0; - for (auto &I : FuncInfo.IndirectCallSites) { + for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) { CallSite CS(I); Value *Callee = CS.getCalledValue(); DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " @@ -565,10 +703,14 @@ static void instrumentOneFunc( {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), Builder.getInt64(FuncInfo.FunctionHash), Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), - Builder.getInt32(llvm::InstrProfValueKind::IPVK_IndirectCallTarget), + Builder.getInt32(IPVK_IndirectCallTarget), Builder.getInt32(NumIndirectCallSites++)}); } NumOfPGOICall += NumIndirectCallSites; + + // Now instrument memop intrinsic calls. + FuncInfo.MIVisitor.instrumentMemIntrinsics( + F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); } // This class represents a CFG edge in profile use compilation. @@ -653,8 +795,11 @@ public: // Set the branch weights based on the count values. void setBranchWeights(); - // Annotate the indirect call sites. - void annotateIndirectCallSites(); + // Annotate the value profile call sites all all value kind. + void annotateValueSites(); + + // Annotate the value profile call sites for one value kind. + void annotateValueSites(uint32_t Kind); // The hotness of the function from the profile count. enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; @@ -677,6 +822,8 @@ public: return FuncInfo.findBBInfo(BB); } + Function &getFunc() const { return F; } + private: Function &F; Module *M; @@ -761,7 +908,7 @@ void PGOUseFunc::setInstrumentedCounts( NewEdge1.InMST = true; getBBInfo(InstrBB).setBBInfoCount(CountValue); } - ProfileCountSize = CountFromProfile.size(); + ProfileCountSize = CountFromProfile.size(); CountPosition = I; } @@ -932,21 +1079,6 @@ void PGOUseFunc::populateCounters() { DEBUG(FuncInfo.dumpInfo("after reading profile.")); } -static void setProfMetadata(Module *M, Instruction *TI, - ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { - MDBuilder MDB(M->getContext()); - assert(MaxCount > 0 && "Bad max count"); - uint64_t Scale = calculateCountScale(MaxCount); - SmallVector<unsigned, 4> Weights; - for (const auto &ECI : EdgeCounts) - Weights.push_back(scaleBranchCount(ECI, Scale)); - - DEBUG(dbgs() << "Weight is: "; - for (const auto &W : Weights) { dbgs() << W << " "; } - dbgs() << "\n";); - TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); -} - // Assign the scaled count values to the BB with multiple out edges. void PGOUseFunc::setBranchWeights() { // Generate MD_prof metadata for every branch instruction. @@ -990,8 +1122,8 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FuncHash), - Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step}); + Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), + Builder.getInt32(*CurCtrIdx), Step}); ++(*CurCtrIdx); } @@ -1020,9 +1152,9 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { if (SI.getCondition()->getType()->isVectorTy()) return; - NSIs++; switch (Mode) { case VM_counting: + NSIs++; return; case VM_instrument: instrumentOneSelectInst(SI); @@ -1035,35 +1167,79 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { llvm_unreachable("Unknown visiting mode"); } -// Traverse all the indirect callsites and annotate the instructions. -void PGOUseFunc::annotateIndirectCallSites() { +void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { + Module *M = F.getParent(); + IRBuilder<> Builder(&MI); + Type *Int64Ty = Builder.getInt64Ty(); + Type *I8PtrTy = Builder.getInt8PtrTy(); + Value *Length = MI.getLength(); + assert(!dyn_cast<ConstantInt>(Length)); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), + {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), + Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); + ++CurCtrId; +} + +void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { + if (!PGOInstrMemOP) + return; + Value *Length = MI.getLength(); + // Not instrument constant length calls. + if (dyn_cast<ConstantInt>(Length)) + return; + + switch (Mode) { + case VM_counting: + NMemIs++; + return; + case VM_instrument: + instrumentOneMemIntrinsic(MI); + return; + case VM_annotate: + Candidates.push_back(&MI); + return; + } + llvm_unreachable("Unknown visiting mode"); +} + +// Traverse all valuesites and annotate the instructions for all value kind. +void PGOUseFunc::annotateValueSites() { if (DisableValueProfiling) return; // Create the PGOFuncName meta data. createPGOFuncNameMetadata(F, FuncInfo.FuncName); - unsigned IndirectCallSiteIndex = 0; - auto &IndirectCallSites = FuncInfo.IndirectCallSites; - unsigned NumValueSites = - ProfileRecord.getNumValueSites(IPVK_IndirectCallTarget); - if (NumValueSites != IndirectCallSites.size()) { - std::string Msg = - std::string("Inconsistent number of indirect call sites: ") + - F.getName().str(); + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + annotateValueSites(Kind); +} + +// Annotate the instructions for a specific value kind. +void PGOUseFunc::annotateValueSites(uint32_t Kind) { + unsigned ValueSiteIndex = 0; + auto &ValueSites = FuncInfo.ValueSites[Kind]; + unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); + if (NumValueSites != ValueSites.size()) { auto &Ctx = M->getContext(); - Ctx.diagnose( - DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + Ctx.diagnose(DiagnosticInfoPGOProfile( + M->getName().data(), + Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) + + " in " + F.getName().str(), + DS_Warning)); return; } - for (auto &I : IndirectCallSites) { - DEBUG(dbgs() << "Read one indirect call instrumentation: Index=" - << IndirectCallSiteIndex << " out of " << NumValueSites - << "\n"); - annotateValueSite(*M, *I, ProfileRecord, IPVK_IndirectCallTarget, - IndirectCallSiteIndex, MaxNumAnnotations); - IndirectCallSiteIndex++; + for (auto &I : ValueSites) { + DEBUG(dbgs() << "Read one value site profile (kind = " << Kind + << "): Index = " << ValueSiteIndex << " out of " + << NumValueSites << "\n"); + annotateValueSite(*M, *I, ProfileRecord, + static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, + Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations + : MaxNumAnnotations); + ValueSiteIndex++; } } } // end anonymous namespace @@ -1196,12 +1372,29 @@ static bool annotateAllFunctions( continue; Func.populateCounters(); Func.setBranchWeights(); - Func.annotateIndirectCallSites(); + Func.annotateValueSites(); PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); if (FreqAttr == PGOUseFunc::FFA_Cold) ColdFunctions.push_back(&F); else if (FreqAttr == PGOUseFunc::FFA_Hot) HotFunctions.push_back(&F); + if (PGOViewCounts && (ViewBlockFreqFuncName.empty() || + F.getName().equals(ViewBlockFreqFuncName))) { + LoopInfo LI{DominatorTree(F)}; + std::unique_ptr<BranchProbabilityInfo> NewBPI = + llvm::make_unique<BranchProbabilityInfo>(F, LI); + std::unique_ptr<BlockFrequencyInfo> NewBFI = + llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); + + NewBFI->view(); + } + if (PGOViewRawCounts && (ViewBlockFreqFuncName.empty() || + F.getName().equals(ViewBlockFreqFuncName))) { + if (ViewBlockFreqFuncName.empty()) + WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); + else + ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); + } } M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); // Set function hotness attribute from the profile. @@ -1257,3 +1450,113 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI); } + +namespace llvm { +void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts, + uint64_t MaxCount) { + MDBuilder MDB(M->getContext()); + assert(MaxCount > 0 && "Bad max count"); + uint64_t Scale = calculateCountScale(MaxCount); + SmallVector<unsigned, 4> Weights; + for (const auto &ECI : EdgeCounts) + Weights.push_back(scaleBranchCount(ECI, Scale)); + + DEBUG(dbgs() << "Weight is: "; + for (const auto &W : Weights) { dbgs() << W << " "; } + dbgs() << "\n";); + TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + if (EmitBranchProbability) { + std::string BrCondStr = getBranchCondString(TI); + if (BrCondStr.empty()) + return; + + unsigned WSum = + std::accumulate(Weights.begin(), Weights.end(), 0, + [](unsigned w1, unsigned w2) { return w1 + w2; }); + uint64_t TotalCount = + std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0, + [](uint64_t c1, uint64_t c2) { return c1 + c2; }); + BranchProbability BP(Weights[0], WSum); + std::string BranchProbStr; + raw_string_ostream OS(BranchProbStr); + OS << BP; + OS << " (total count : " << TotalCount << ")"; + OS.flush(); + Function *F = TI->getParent()->getParent(); + emitOptimizationRemarkAnalysis( + F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(), + Twine(BrCondStr) + + " is true with probability : " + Twine(BranchProbStr)); + } +} + +template <> struct GraphTraits<PGOUseFunc *> { + typedef const BasicBlock *NodeRef; + typedef succ_const_iterator ChildIteratorType; + typedef pointer_iterator<Function::const_iterator> nodes_iterator; + + static NodeRef getEntryNode(const PGOUseFunc *G) { + return &G->getFunc().front(); + } + static ChildIteratorType child_begin(const NodeRef N) { + return succ_begin(N); + } + static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } + static nodes_iterator nodes_begin(const PGOUseFunc *G) { + return nodes_iterator(G->getFunc().begin()); + } + static nodes_iterator nodes_end(const PGOUseFunc *G) { + return nodes_iterator(G->getFunc().end()); + } +}; + +static std::string getSimpleNodeName(const BasicBlock *Node) { + if (!Node->getName().empty()) + return Node->getName(); + + std::string SimpleNodeName; + raw_string_ostream OS(SimpleNodeName); + Node->printAsOperand(OS, false); + return OS.str(); +} + +template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple = false) + : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const PGOUseFunc *G) { + return G->getFunc().getName(); + } + + std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << getSimpleNodeName(Node) << ":\\l"; + UseBBInfo *BI = Graph->findBBInfo(Node); + OS << "Count : "; + if (BI && BI->CountValid) + OS << BI->CountValue << "\\l"; + else + OS << "Unknown\\l"; + + if (!PGOInstrSelect) + return Result; + + for (auto BI = Node->begin(); BI != Node->end(); ++BI) { + auto *I = &*BI; + if (!isa<SelectInst>(I)) + continue; + // Display scaled counts for SELECT instruction: + OS << "SELECT : { T = "; + uint64_t TC, FC; + bool HasProf = I->extractProfMetadata(TC, FC); + if (!HasProf) + OS << "Unknown, F = Unknown }\\l"; + else + OS << TC << ", F = " << FC << " }\\l"; + } + return Result; + } +}; +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp new file mode 100644 index 0000000..0bc9ddf --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -0,0 +1,419 @@ +//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that optimizes memory intrinsics +// such as memcpy using the size value profile. When memory intrinsic size +// value profile metadata is available, a single memory intrinsic is expanded +// to a sequence of guarded specialized versions that are called with the +// hottest size(s), for later expansion into more optimal inline sequences. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PGOInstrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <cassert> +#include <cstdint> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pgo-memop-opt" + +STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); +STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); + +// The minimum call count to optimize memory intrinsic calls. +static cl::opt<unsigned> + MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to optimize memory " + "intrinsic calls")); + +// Command line option to disable memory intrinsic optimization. The default is +// false. This is for debug purpose. +static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false), + cl::Hidden, cl::desc("Disable optimize")); + +// The percent threshold to optimize memory intrinsic calls. +static cl::opt<unsigned> + MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold for the " + "memory intrinsic calls optimization")); + +// Maximum number of versions for optimizing memory intrinsic call. +static cl::opt<unsigned> + MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The max version for the optimized memory " + " intrinsic calls")); + +// Scale the counts from the annotation using the BB count value. +static cl::opt<bool> + MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, + cl::desc("Scale the memop size counts using the basic " + " block count value")); + +// This option sets the rangge of precise profile memop sizes. +extern cl::opt<std::string> MemOPSizeRange; + +// This option sets the value that groups large memop sizes +extern cl::opt<unsigned> MemOPSizeLarge; + +namespace { +class PGOMemOPSizeOptLegacyPass : public FunctionPass { +public: + static char ID; + + PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { + initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "PGOMemOPSize"; } + +private: + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} // end anonymous namespace + +char PGOMemOPSizeOptLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) + +FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { + return new PGOMemOPSizeOptLegacyPass(); +} + +namespace { +class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { +public: + MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) + : Func(Func), BFI(BFI), Changed(false) { + ValueDataArray = + llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); + // Get the MemOPSize range information from option MemOPSizeRange, + getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, + PreciseRangeLast); + } + bool isChanged() const { return Changed; } + void perform() { + WorkList.clear(); + visit(Func); + + for (auto &MI : WorkList) { + ++NumOfPGOMemOPAnnotate; + if (perform(MI)) { + Changed = true; + ++NumOfPGOMemOPOpt; + DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() + << "is Transformed.\n"); + } + } + } + + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not perform on constant length calls. + if (dyn_cast<ConstantInt>(Length)) + return; + WorkList.push_back(&MI); + } + +private: + Function &Func; + BlockFrequencyInfo &BFI; + bool Changed; + std::vector<MemIntrinsic *> WorkList; + // Start of the previse range. + int64_t PreciseRangeStart; + // Last value of the previse range. + int64_t PreciseRangeLast; + // The space to read the profile annotation. + std::unique_ptr<InstrProfValueData[]> ValueDataArray; + bool perform(MemIntrinsic *MI); + + // This kind shows which group the value falls in. For PreciseValue, we have + // the profile count for that value. LargeGroup groups the values that are in + // range [LargeValue, +inf). NonLargeGroup groups the rest of values. + enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; + + MemOPSizeKind getMemOPSizeKind(int64_t Value) const { + if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) + return LargeGroup; + if (Value == PreciseRangeLast + 1) + return NonLargeGroup; + return PreciseValue; + } +}; + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +static bool isProfitable(uint64_t Count, uint64_t TotalCount) { + assert(Count <= TotalCount); + if (Count < MemOPCountThreshold) + return false; + if (Count < TotalCount * MemOPPercentThreshold / 100) + return false; + return true; +} + +static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, + uint64_t Denom) { + if (!MemOPScaleCount) + return Count; + bool Overflowed; + uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); + return ScaleCount / Denom; +} + +bool MemOPSizeOpt::perform(MemIntrinsic *MI) { + assert(MI); + if (MI->getIntrinsicID() == Intrinsic::memmove) + return false; + + uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; + uint64_t TotalCount; + if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount)) + return false; + + uint64_t ActualCount = TotalCount; + uint64_t SavedTotalCount = TotalCount; + if (MemOPScaleCount) { + auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + if (!BBEdgeCount) + return false; + ActualCount = *BBEdgeCount; + } + + ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); + DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount + << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + + if (ActualCount < MemOPCountThreshold) + return false; + // Skip if the total value profiled count is 0, in which case we can't + // scale up the counts properly (and there is no profitable transformation). + if (TotalCount == 0) + return false; + + TotalCount = ActualCount; + if (MemOPScaleCount) + DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount + << " denominator = " << SavedTotalCount << "\n"); + + // Keeping track of the count of the default case: + uint64_t RemainCount = TotalCount; + uint64_t SavedRemainCount = SavedTotalCount; + SmallVector<uint64_t, 16> SizeIds; + SmallVector<uint64_t, 16> CaseCounts; + uint64_t MaxCount = 0; + unsigned Version = 0; + // Default case is in the front -- save the slot here. + CaseCounts.push_back(0); + for (auto &VD : VDs) { + int64_t V = VD.Value; + uint64_t C = VD.Count; + if (MemOPScaleCount) + C = getScaledCount(C, ActualCount, SavedTotalCount); + + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + + // ValueCounts are sorted on the count. Break at the first un-profitable + // value. + if (!isProfitable(C, RemainCount)) + break; + + SizeIds.push_back(V); + CaseCounts.push_back(C); + if (C > MaxCount) + MaxCount = C; + + assert(RemainCount >= C); + RemainCount -= C; + assert(SavedRemainCount >= VD.Count); + SavedRemainCount -= VD.Count; + + if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) + break; + } + + if (Version == 0) + return false; + + CaseCounts[0] = RemainCount; + if (RemainCount > MaxCount) + MaxCount = RemainCount; + + uint64_t SumForOpt = TotalCount - RemainCount; + + DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); + + // mem_op(..., size) + // ==> + // switch (size) { + // case s1: + // mem_op(..., s1); + // goto merge_bb; + // case s2: + // mem_op(..., s2); + // goto merge_bb; + // ... + // default: + // mem_op(..., size); + // goto merge_bb; + // } + // merge_bb: + + BasicBlock *BB = MI->getParent(); + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + auto OrigBBFreq = BFI.getBlockFreq(BB); + + BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock::iterator It(*MI); + ++It; + assert(It != DefaultBB->end()); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + MergeBB->setName("MemOP.Merge"); + BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); + DefaultBB->setName("MemOP.Default"); + + auto &Ctx = Func.getContext(); + IRBuilder<> IRB(BB); + BB->getTerminator()->eraseFromParent(); + Value *SizeVar = MI->getLength(); + SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + + // Clear the value profile data. + MI->setMetadata(LLVMContext::MD_prof, nullptr); + // If all promoted, we don't need the MD.prof metadata. + if (SavedRemainCount > 0 || Version != NumVals) + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + SavedRemainCount, IPVK_MemOPSize, NumVals); + + DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + + for (uint64_t SizeId : SizeIds) { + ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); + BasicBlock *CaseBB = BasicBlock::Create( + Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); + Instruction *NewInst = MI->clone(); + // Fix the argument. + dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewInst); + IRBuilder<> IRBCase(CaseBB); + IRBCase.CreateBr(MergeBB); + SI->addCase(CaseSizeId, CaseBB); + DEBUG(dbgs() << *CaseBB << "\n"); + } + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); + + DEBUG(dbgs() << *BB << "\n"); + DEBUG(dbgs() << *DefaultBB << "\n"); + DEBUG(dbgs() << *MergeBB << "\n"); + + emitOptimizationRemark(Func.getContext(), "memop-opt", Func, + MI->getDebugLoc(), + Twine("optimize ") + getMIName(MI) + " with count " + + Twine(SumForOpt) + " out of " + Twine(TotalCount) + + " for " + Twine(Version) + " versions"); + + return true; +} +} // namespace + +static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { + if (DisableMemOPOPT) + return false; + + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + MemOPSizeOpt MemOPSizeOpt(F, BFI); + MemOPSizeOpt.perform(); + return MemOPSizeOpt.isChanged(); +} + +bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { + BlockFrequencyInfo &BFI = + getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + return PGOMemOPSizeOptImpl(F, BFI); +} + +namespace llvm { +char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; + +PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve<GlobalsAA>(); + return PA; +} +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 5b4b1fb..06fe075 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -7,24 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Coverage instrumentation that works with AddressSanitizer -// and potentially with other Sanitizers. -// -// We create a Guard variable with the same linkage -// as the function and inject this code into the entry block (SCK_Function) -// or all blocks (SCK_BB): -// if (Guard < 0) { -// __sanitizer_cov(&Guard); -// } -// The accesses to Guard are atomic. The rest of the logic is -// in __sanitizer_cov (it's fine to call it more than once). -// -// With SCK_Edge we also split critical edges this effectively -// instrumenting all edges. -// -// This coverage implementation provides very limited data: -// it only tells if a given function (block) was ever executed. No counters. -// But for many use cases this is what we need and the added slowdown small. +// Coverage instrumentation done on LLVM IR level, works with Sanitizers. // //===----------------------------------------------------------------------===// @@ -56,16 +39,8 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; -static const char *const SanCovName = "__sanitizer_cov"; -static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; -static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const SanCovTracePCIndirName = "__sanitizer_cov_trace_pc_indir"; -static const char *const SanCovTraceEnterName = - "__sanitizer_cov_trace_func_enter"; -static const char *const SanCovTraceBBName = - "__sanitizer_cov_trace_basic_block"; static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1"; static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2"; @@ -78,39 +53,34 @@ static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch"; static const char *const SanCovModuleCtorName = "sancov.module_ctor"; static const uint64_t SanCtorAndDtorPriority = 2; -static const char *const SanCovTracePCGuardSection = "__sancov_guards"; static const char *const SanCovTracePCGuardName = "__sanitizer_cov_trace_pc_guard"; static const char *const SanCovTracePCGuardInitName = "__sanitizer_cov_trace_pc_guard_init"; +static const char *const SanCov8bitCountersInitName = + "__sanitizer_cov_8bit_counters_init"; + +static const char *const SanCovGuardsSectionName = "sancov_guards"; +static const char *const SanCovCountersSectionName = "sancov_cntrs"; static cl::opt<int> ClCoverageLevel( "sanitizer-coverage-level", cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " - "3: all blocks and critical edges, " - "4: above plus indirect calls"), + "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); -static cl::opt<unsigned> ClCoverageBlockThreshold( - "sanitizer-coverage-block-threshold", - cl::desc("Use a callback with a guard check inside it if there are" - " more than this number of blocks."), - cl::Hidden, cl::init(500)); - -static cl::opt<bool> - ClExperimentalTracing("sanitizer-coverage-experimental-tracing", - cl::desc("Experimental basic-block tracing: insert " - "callbacks at every basic block"), - cl::Hidden, cl::init(false)); - -static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", - cl::desc("Experimental pc tracing"), - cl::Hidden, cl::init(false)); +static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc", + cl::desc("Experimental pc tracing"), cl::Hidden, + cl::init(false)); static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard", cl::desc("pc tracing with a guard"), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", + cl::desc("increments 8-bit counter for every edge"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> ClCMPTracing("sanitizer-coverage-trace-compares", cl::desc("Tracing of CMP and similar instructions"), @@ -129,16 +99,6 @@ static cl::opt<bool> cl::desc("Reduce the number of instrumented blocks"), cl::Hidden, cl::init(true)); -// Experimental 8-bit counters used as an additional search heuristic during -// coverage-guided fuzzing. -// The counters are not thread-friendly: -// - contention on these counters may cause significant slowdown; -// - the counter updates are racy and the results may be inaccurate. -// They are also inaccurate due to 8-bit integer overflow. -static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters", - cl::desc("Experimental 8-bit counters"), - cl::Hidden, cl::init(false)); - namespace { SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { @@ -169,13 +129,15 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel); Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType); Options.IndirectCalls |= CLOpts.IndirectCalls; - Options.TraceBB |= ClExperimentalTracing; Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.Use8bitCounters |= ClUse8bitCounters; - Options.TracePC |= ClExperimentalTracePC; + Options.TracePC |= ClTracePC; Options.TracePCGuard |= ClTracePCGuard; + Options.Inline8bitCounters |= ClInline8bitCounters; + if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters) + Options.TracePCGuard = true; // TracePCGuard is default. + Options.NoPrune |= !ClPruneBlocks; return Options; } @@ -207,90 +169,128 @@ private: void InjectTraceForSwitch(Function &F, ArrayRef<Instruction *> SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); - void CreateFunctionGuardArray(size_t NumGuards, Function &F); - void SetNoSanitizeMetadata(Instruction *I); - void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, - bool UseCalls); - unsigned NumberOfInstrumentedBlocks() { - return SanCovFunction->getNumUses() + - SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() + - SanCovTraceEnter->getNumUses(); + GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, + Function &F, Type *Ty, + const char *Section); + void CreateFunctionLocalArrays(size_t NumGuards, Function &F); + void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); + void CreateInitCallForSection(Module &M, const char *InitFunctionName, + Type *Ty, const std::string &Section); + + void SetNoSanitizeMetadata(Instruction *I) { + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); } - Function *SanCovFunction; - Function *SanCovWithCheckFunction; - Function *SanCovIndirCallFunction, *SanCovTracePCIndir; - Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard; + + std::string getSectionName(const std::string &Section) const; + std::string getSectionStart(const std::string &Section) const; + std::string getSectionEnd(const std::string &Section) const; + Function *SanCovTracePCIndir; + Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; Function *SanCovTraceDivFunction[2]; Function *SanCovTraceGepFunction; Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy; + Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, + *Int8Ty, *Int8PtrTy; Module *CurModule; + Triple TargetTriple; LLVMContext *C; const DataLayout *DL; - GlobalVariable *GuardArray; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - GlobalVariable *EightBitCounterArray; - bool HasSancovGuardsSection; + GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. SanitizerCoverageOptions Options; }; } // namespace +void SanitizerCoverageModule::CreateInitCallForSection( + Module &M, const char *InitFunctionName, Type *Ty, + const std::string &Section) { + IRBuilder<> IRB(M.getContext()); + Function *CtorFunc; + GlobalVariable *SecStart = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionStart(Section)); + SecStart->setVisibility(GlobalValue::HiddenVisibility); + GlobalVariable *SecEnd = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, getSectionEnd(Section)); + SecEnd->setVisibility(GlobalValue::HiddenVisibility); + + std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( + M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, + {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)}); + + if (TargetTriple.supportsCOMDAT()) { + // Use comdat to dedup CtorFunc. + CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); + } else { + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); + } +} + bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; C = &(M.getContext()); DL = &M.getDataLayout(); CurModule = &M; - HasSancovGuardsSection = false; + TargetTriple = Triple(M.getTargetTriple()); + FunctionGuardArray = nullptr; + Function8bitCounterArray = nullptr; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); IntptrPtrTy = PointerType::getUnqual(IntptrTy); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); - Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); + Int8Ty = IRB.getInt8Ty(); - SanCovFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy, nullptr)); - SanCovWithCheckFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); SanCovTracePCIndir = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy, nullptr)); - SanCovIndirCallFunction = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); + M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); SanCovTraceCmpFunction[0] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty(), nullptr)); + SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty())); SanCovTraceCmpFunction[1] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTraceCmp2, VoidTy, IRB.getInt16Ty(), - IRB.getInt16Ty(), nullptr)); + IRB.getInt16Ty())); SanCovTraceCmpFunction[2] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTraceCmp4, VoidTy, IRB.getInt32Ty(), - IRB.getInt32Ty(), nullptr)); + IRB.getInt32Ty())); SanCovTraceCmpFunction[3] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty, nullptr)); + SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty)); SanCovTraceDivFunction[0] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTraceDiv4, VoidTy, IRB.getInt32Ty(), nullptr)); + SanCovTraceDiv4, VoidTy, IRB.getInt32Ty())); SanCovTraceDivFunction[1] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTraceDiv8, VoidTy, Int64Ty, nullptr)); + SanCovTraceDiv8, VoidTy, Int64Ty)); SanCovTraceGepFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTraceGep, VoidTy, IntptrTy, nullptr)); + SanCovTraceGep, VoidTy, IntptrTy)); SanCovTraceSwitchFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy, nullptr)); + SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy)); + // Make sure smaller parameters are zero-extended to i64 as required by the + // x86_64 ABI. + if (TargetTriple.getArch() == Triple::x86_64) { + for (int i = 0; i < 3; i++) { + SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt); + SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt); + } + SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt); + } + // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -298,102 +298,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { /*hasSideEffects=*/true); SanCovTracePC = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTracePCName, VoidTy, nullptr)); + M.getOrInsertFunction(SanCovTracePCName, VoidTy)); SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovTracePCGuardName, VoidTy, Int32PtrTy, nullptr)); - SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy, nullptr)); - SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy, nullptr)); - - // At this point we create a dummy array of guards because we don't - // know how many elements we will need. - Type *Int32Ty = IRB.getInt32Ty(); - Type *Int8Ty = IRB.getInt8Ty(); - - if (!Options.TracePCGuard) - GuardArray = - new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); - if (Options.Use8bitCounters) - EightBitCounterArray = - new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); + SanCovTracePCGuardName, VoidTy, Int32PtrTy)); for (auto &F : M) runOnFunction(F); - auto N = NumberOfInstrumentedBlocks(); - - GlobalVariable *RealGuardArray = nullptr; - if (!Options.TracePCGuard) { - // Now we know how many elements we need. Create an array of guards - // with one extra element at the beginning for the size. - Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1); - RealGuardArray = new GlobalVariable( - M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); - - // Replace the dummy array with the real one. - GuardArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); - GuardArray->eraseFromParent(); - } - - GlobalVariable *RealEightBitCounterArray; - if (Options.Use8bitCounters) { - // Make sure the array is 16-aligned. - static const int CounterAlignment = 16; - Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment)); - RealEightBitCounterArray = new GlobalVariable( - M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); - RealEightBitCounterArray->setAlignment(CounterAlignment); - EightBitCounterArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); - EightBitCounterArray->eraseFromParent(); - } - - // Create variable for module (compilation unit) name - Constant *ModNameStrConst = - ConstantDataArray::getString(M.getContext(), M.getName(), true); - GlobalVariable *ModuleName = new GlobalVariable( - M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage, - ModNameStrConst, "__sancov_gen_modname"); - if (Options.TracePCGuard) { - if (HasSancovGuardsSection) { - Function *CtorFunc; - std::string SectionName(SanCovTracePCGuardSection); - GlobalVariable *Bounds[2]; - const char *Prefix[2] = {"__start_", "__stop_"}; - for (int i = 0; i < 2; i++) { - Bounds[i] = new GlobalVariable(M, Int32PtrTy, false, - GlobalVariable::ExternalLinkage, nullptr, - Prefix[i] + SectionName); - Bounds[i]->setVisibility(GlobalValue::HiddenVisibility); - } - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovTracePCGuardInitName, - {Int32PtrTy, Int32PtrTy}, - {IRB.CreatePointerCast(Bounds[0], Int32PtrTy), - IRB.CreatePointerCast(Bounds[1], Int32PtrTy)}); - - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } - } else if (!Options.TracePC) { - Function *CtorFunc; - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovModuleInitName, - {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, - {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), - Options.Use8bitCounters - ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) - : Constant::getNullValue(Int8PtrTy), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); - - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } + if (FunctionGuardArray) + CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy, + SanCovGuardsSectionName); + if (Function8bitCounterArray) + CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy, + SanCovCountersSectionName); return true; } @@ -425,8 +342,10 @@ static bool isFullPostDominator(const BasicBlock *BB, return true; } -static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const DominatorTree *DT, - const PostDominatorTree *PDT) { +static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, + const DominatorTree *DT, + const PostDominatorTree *PDT, + const SanitizerCoverageOptions &Options) { // Don't insert coverage for unreachable blocks: we will never call // __sanitizer_cov() for them, so counting them in // NumberOfInstrumentedBlocks() might complicate calculation of code coverage @@ -435,10 +354,18 @@ static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const if (isa<UnreachableInst>(BB->getTerminator())) return false; - if (!ClPruneBlocks || &F.getEntryBlock() == BB) + // Don't insert coverage into blocks without a valid insertion point + // (catchswitch blocks). + if (BB->getFirstInsertionPt() == BB->end()) + return false; + + if (Options.NoPrune || &F.getEntryBlock() == BB) return true; - return !(isFullDominator(BB, DT) || isFullPostDominator(BB, PDT)); + // Do not instrument full dominators, or full post-dominators with multiple + // predecessors. + return !isFullDominator(BB, DT) + && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor()); } bool SanitizerCoverageModule::runOnFunction(Function &F) { @@ -474,7 +401,7 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { &getAnalysis<PostDominatorTreeWrapperPass>(F).getPostDomTree(); for (auto &BB : F) { - if (shouldInstrumentBlock(F, &BB, DT, PDT)) + if (shouldInstrumentBlock(F, &BB, DT, PDT, Options)) BlocksToInstrument.push_back(&BB); for (auto &Inst : BB) { if (Options.IndirectCalls) { @@ -507,17 +434,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForGep(F, GepTraceTargets); return true; } -void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards, - Function &F) { - if (!Options.TracePCGuard) return; - HasSancovGuardsSection = true; - ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards); - FunctionGuardArray = new GlobalVariable( - *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage, - Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_"); + +GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( + size_t NumElements, Function &F, Type *Ty, const char *Section) { + ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); + auto Array = new GlobalVariable( + *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, + Constant::getNullValue(ArrayTy), "__sancov_gen_"); if (auto Comdat = F.getComdat()) - FunctionGuardArray->setComdat(Comdat); - FunctionGuardArray->setSection(SanCovTracePCGuardSection); + Array->setComdat(Comdat); + Array->setSection(getSectionName(Section)); + return Array; +} +void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards, + Function &F) { + if (Options.TracePCGuard) + FunctionGuardArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int32Ty, SanCovGuardsSectionName); + if (Options.Inline8bitCounters) + Function8bitCounterArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int8Ty, SanCovCountersSectionName); } bool SanitizerCoverageModule::InjectCoverage(Function &F, @@ -527,14 +463,13 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, case SanitizerCoverageOptions::SCK_None: return false; case SanitizerCoverageOptions::SCK_Function: - CreateFunctionGuardArray(1, F); - InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false); + CreateFunctionLocalArrays(1, F); + InjectCoverageAtBlock(F, F.getEntryBlock(), 0); return true; default: { - bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size(); - CreateFunctionGuardArray(AllBlocks.size(), F); + CreateFunctionLocalArrays(AllBlocks.size(), F); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) - InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls); + InjectCoverageAtBlock(F, *AllBlocks[i], i); return true; } } @@ -551,26 +486,14 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - const int CacheSize = 16; - const int CacheAlignment = 64; // Align for better performance. - Type *Ty = ArrayType::get(IntptrTy, CacheSize); + assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); if (isa<InlineAsm>(Callee)) continue; - GlobalVariable *CalleeCache = new GlobalVariable( - *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); - CalleeCache->setAlignment(CacheAlignment); - if (Options.TracePC || Options.TracePCGuard) - IRB.CreateCall(SanCovTracePCIndir, - IRB.CreatePointerCast(Callee, IntptrTy)); - else - IRB.CreateCall(SanCovIndirCallFunction, - {IRB.CreatePointerCast(Callee, IntptrTy), - IRB.CreatePointerCast(CalleeCache, IntptrTy)}); + IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); } } @@ -670,13 +593,8 @@ void SanitizerCoverageModule::InjectTraceForCmp( } } -void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(I->getModule()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); -} - void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, - size_t Idx, bool UseCalls) { + size_t Idx) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; @@ -696,65 +614,51 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else if (Options.TracePCGuard) { + } + if (Options.TracePCGuard) { auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), Int32PtrTy); - if (!UseCalls) { - auto GuardLoad = IRB.CreateLoad(GuardPtr); - GuardLoad->setAtomic(AtomicOrdering::Monotonic); - GuardLoad->setAlignment(8); - SetNoSanitizeMetadata(GuardLoad); // Don't instrument with e.g. asan. - auto Cmp = IRB.CreateICmpNE( - GuardLoad, Constant::getNullValue(GuardLoad->getType())); - auto Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - } IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else { - Value *GuardP = IRB.CreateAdd( - IRB.CreatePointerCast(GuardArray, IntptrTy), - ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); - GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (Options.TraceBB) { - IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); - } else if (UseCalls) { - IRB.CreateCall(SanCovWithCheckFunction, GuardP); - } else { - LoadInst *Load = IRB.CreateLoad(GuardP); - Load->setAtomic(AtomicOrdering::Monotonic); - Load->setAlignment(4); - SetNoSanitizeMetadata(Load); - Value *Cmp = - IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); - Instruction *Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. - IRB.CreateCall(SanCovFunction, GuardP); - IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } } - - if (Options.Use8bitCounters) { - IRB.SetInsertPoint(&*IP); - Value *P = IRB.CreateAdd( - IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), - ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); - P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); - LoadInst *LI = IRB.CreateLoad(P); - Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); - StoreInst *SI = IRB.CreateStore(Inc, P); - SetNoSanitizeMetadata(LI); - SetNoSanitizeMetadata(SI); + if (Options.Inline8bitCounters) { + auto CounterPtr = IRB.CreateGEP( + Function8bitCounterArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(CounterPtr); + auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); + auto Store = IRB.CreateStore(Inc, CounterPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); } } +std::string +SanitizerCoverageModule::getSectionName(const std::string &Section) const { + if (TargetTriple.getObjectFormat() == Triple::COFF) + return ".SCOV$M"; + if (TargetTriple.isOSBinFormatMachO()) + return "__DATA,__" + Section; + return "__" + Section; +} + +std::string +SanitizerCoverageModule::getSectionStart(const std::string &Section) const { + if (TargetTriple.isOSBinFormatMachO()) + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; +} + +std::string +SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { + if (TargetTriple.isOSBinFormatMachO()) + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; +} + + char SanitizerCoverageModule::ID = 0; INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov", "SanitizerCoverage: TODO." diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 52035c7..ec69044 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,7 +19,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -42,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" @@ -155,17 +155,18 @@ FunctionPass *llvm::createThreadSanitizerPass() { void ThreadSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(M.getContext()); - AttributeSet Attr; - Attr = Attr.addAttribute(M.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); + AttributeList Attr; + Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex, + Attribute::NoUnwind); // Initialize the callbacks. TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy())); TsanFuncExit = checkSanitizerInterfaceFunction( - M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy(), nullptr)); + M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy())); TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy(), nullptr)); + "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy())); TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - "__tsan_ignore_thread_end", Attr, IRB.getVoidTy(), nullptr)); + "__tsan_ignore_thread_end", Attr, IRB.getVoidTy())); OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const unsigned ByteSize = 1U << i; @@ -174,31 +175,31 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { std::string BitSizeStr = utostr(BitSize); SmallString<32> ReadName("__tsan_read" + ByteSizeStr); TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy())); SmallString<32> WriteName("__tsan_write" + ByteSizeStr); TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy())); SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr); TsanUnalignedRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy())); SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr); TsanUnalignedWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy())); Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy, nullptr)); + M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy)); SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store"); TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr)); + AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy)); for (int op = AtomicRMWInst::FIRST_BINOP; op <= AtomicRMWInst::LAST_BINOP; ++op) { @@ -222,33 +223,33 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { continue; SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy, nullptr)); + M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy)); } SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr + "_compare_exchange_val"); TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr)); + AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy)); } TsanVptrUpdate = checkSanitizerInterfaceFunction( M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr)); + IRB.getInt8PtrTy(), IRB.getInt8PtrTy())); TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy())); TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr)); + "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy)); TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr)); + "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy)); MemmoveFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IntptrTy)); MemcpyFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); + IRB.getInt8PtrTy(), IntptrTy)); MemsetFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt32Ty(), IntptrTy, nullptr)); + IRB.getInt32Ty(), IntptrTy)); } bool ThreadSanitizer::doInitialization(Module &M) { @@ -271,7 +272,7 @@ static bool isVtableAccess(Instruction *I) { // Do not instrument known races/"benign races" that come from compiler // instrumentatin. The user has no way of suppressing them. -static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { +static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { // Peel off GEPs and BitCasts. Addr = Addr->stripInBoundsOffsets(); @@ -279,8 +280,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { if (GV->hasSection()) { StringRef SectionName = GV->getSection(); // Check if the global is in the PGO counters section. - if (SectionName.endswith(getInstrProfCountersSectionName( - /*AddSegment=*/false))) + auto OF = Triple(M->getTargetTriple()).getObjectFormat(); + if (SectionName.endswith( + getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) return false; } @@ -342,13 +344,13 @@ void ThreadSanitizer::chooseInstructionsToInstrument( for (Instruction *I : reverse(Local)) { if (StoreInst *Store = dyn_cast<StoreInst>(I)) { Value *Addr = Store->getPointerOperand(); - if (!shouldInstrumentReadWriteFromAddress(Addr)) + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) continue; WriteTargets.insert(Addr); } else { LoadInst *Load = cast<LoadInst>(I); Value *Addr = Load->getPointerOperand(); - if (!shouldInstrumentReadWriteFromAddress(Addr)) + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) continue; if (WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. @@ -377,10 +379,11 @@ void ThreadSanitizer::chooseInstructionsToInstrument( } static bool isAtomic(Instruction *I) { + // TODO: Ask TTI whether synchronization scope is between threads. if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return LI->isAtomic() && LI->getSynchScope() == CrossThread; + return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return SI->isAtomic() && SI->getSynchScope() == CrossThread; + return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; if (isa<AtomicRMWInst>(I)) return true; if (isa<AtomicCmpXchgInst>(I)) @@ -674,7 +677,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { I->eraseFromParent(); } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; - Function *F = FI->getSynchScope() == SingleThread ? + Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ? TsanAtomicSignalFence : TsanAtomicThreadFence; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); |