diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Instrumentation')
12 files changed, 2234 insertions, 499 deletions
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index e7ef9f9..a9df5e5 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -90,7 +91,9 @@ static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *const kAsanInitName = "__asan_init_v5"; +static const char *const kAsanInitName = "__asan_init"; +static const char *const kAsanVersionCheckName = + "__asan_version_mismatch_check_v6"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; @@ -119,6 +122,10 @@ static const unsigned kAllocaRzSize = 32; static cl::opt<bool> ClEnableKasan( "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClRecover( + "asan-recover", + cl::desc("Enable recovery mode (continue-after-error)."), + cl::Hidden, cl::init(false)); // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt<bool> ClInstrumentReads("asan-instrument-reads", @@ -177,7 +184,7 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix( cl::init("__asan_")); static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas", cl::desc("instrument dynamic allocas"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt<bool> ClSkipPromotableAllocas( "asan-skip-promotable-allocas", cl::desc("Do not instrument promotable allocas"), cl::Hidden, @@ -273,6 +280,11 @@ class GlobalsMetadata { GlobalsMetadata() : inited_(false) {} + void reset() { + inited_ = false; + Entries.clear(); + } + void init(Module &M) { assert(!inited_); inited_ = true; @@ -321,7 +333,7 @@ struct ShadowMapping { static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsKasan) { - bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; + bool IsAndroid = TargetTriple.isAndroid(); bool IsIOS = TargetTriple.isiOS(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); bool IsLinux = TargetTriple.isOSLinux(); @@ -338,6 +350,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, ShadowMapping Mapping; if (LongSize == 32) { + // Android is always PIE, which means that the beginning of the address + // space is always available. if (IsAndroid) Mapping.Offset = 0; else if (IsMIPS32) @@ -376,7 +390,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, // OR-ing shadow offset if more efficient (at least on x86) if the offset // is a power of two, but on ppc64 we have to use add since the shadow // offset is not necessary 1/8-th of the address space. - Mapping.OrShadowOffset = !IsPPC64 && !(Mapping.Offset & (Mapping.Offset - 1)); + Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 + && !(Mapping.Offset & (Mapping.Offset - 1)); return Mapping; } @@ -389,8 +404,9 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - explicit AddressSanitizer(bool CompileKernel = false) - : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan) { + explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false) + : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan), + Recover(Recover || ClRecover) { initializeAddressSanitizerPass(*PassRegistry::getPassRegistry()); } const char *getPassName() const override { @@ -437,7 +453,9 @@ struct AddressSanitizer : public FunctionPass { Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; bool maybeInsertAsanInitAtFunctionEntry(Function &F); + void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; static char ID; // Pass identification, replacement for typeid DominatorTree &getDominatorTree() const { return *DT; } @@ -450,10 +468,21 @@ struct AddressSanitizer : public FunctionPass { bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, uint64_t TypeSize) const; + /// Helper to cleanup per-function state. + struct FunctionStateRAII { + AddressSanitizer *Pass; + FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) { + assert(Pass->ProcessedAllocas.empty() && + "last pass forgot to clear cache"); + } + ~FunctionStateRAII() { Pass->ProcessedAllocas.clear(); } + }; + LLVMContext *C; Triple TargetTriple; int LongSize; bool CompileKernel; + bool Recover; Type *IntptrTy; ShadowMapping Mapping; DominatorTree *DT; @@ -477,8 +506,10 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - explicit AddressSanitizerModule(bool CompileKernel = false) - : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan) {} + explicit AddressSanitizerModule(bool CompileKernel = false, + bool Recover = false) + : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), + Recover(Recover || ClRecover) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid const char *getPassName() const override { return "AddressSanitizerModule"; } @@ -496,6 +527,7 @@ class AddressSanitizerModule : public ModulePass { GlobalsMetadata GlobalsMD; bool CompileKernel; + bool Recover; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -525,6 +557,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { ShadowMapping Mapping; SmallVector<AllocaInst *, 16> AllocaVec; + SmallSetVector<AllocaInst *, 16> NonInstrumentedStaticAllocaVec; SmallVector<Instruction *, 8> RetVec; unsigned StackAlignment; @@ -545,12 +578,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { SmallVector<AllocaInst *, 1> DynamicAllocaVec; SmallVector<IntrinsicInst *, 1> StackRestoreVec; AllocaInst *DynamicAllocaLayout = nullptr; + IntrinsicInst *LocalEscapeCall = nullptr; // Maps Value to an AllocaInst from which the Value is originated. typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy; AllocaForValueMapTy AllocaForValue; - bool HasNonEmptyInlineAsm; + bool HasNonEmptyInlineAsm = false; + bool HasReturnsTwiceCall = false; std::unique_ptr<CallInst> EmptyInlineAsm; FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) @@ -562,7 +597,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), StackAlignment(1 << Mapping.Scale), - HasNonEmptyInlineAsm(false), EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {} bool runOnFunction() { @@ -596,9 +630,24 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, Value *SavedStack) { IRBuilder<> IRB(InstBefore); + Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy); + // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we + // need to adjust extracted SP to compute the address of the most recent + // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for + // this purpose. + if (!isa<ReturnInst>(InstBefore)) { + Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration( + InstBefore->getModule(), Intrinsic::get_dynamic_area_offset, + {IntptrTy}); + + Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {}); + + DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy), + DynamicAreaOffset); + } + IRB.CreateCall(AsanAllocasUnpoisonFunc, - {IRB.CreateLoad(DynamicAllocaLayout), - IRB.CreatePtrToInt(SavedStack, IntptrTy)}); + {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr}); } // Unpoison dynamic allocas redzones. @@ -625,7 +674,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { - if (!ASan.isInterestingAlloca(AI)) return; + if (!ASan.isInterestingAlloca(AI)) { + if (AI.isStaticAlloca()) NonInstrumentedStaticAllocaVec.insert(&AI); + return; + } StackAlignment = std::max(StackAlignment, AI.getAlignment()); if (ASan.isDynamicAlloca(AI)) @@ -639,6 +691,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { void visitIntrinsicInst(IntrinsicInst &II) { Intrinsic::ID ID = II.getIntrinsicID(); if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II); + if (ID == Intrinsic::localescape) LocalEscapeCall = &II; if (!ClCheckLifetime) return; if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; @@ -660,9 +713,13 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { AllocaPoisonCallVec.push_back(APC); } - void visitCallInst(CallInst &CI) { - HasNonEmptyInlineAsm |= - CI.isInlineAsm() && !CI.isIdenticalTo(EmptyInlineAsm.get()); + void visitCallSite(CallSite CS) { + Instruction *I = CS.getInstruction(); + if (CallInst *CI = dyn_cast<CallInst>(I)) { + HasNonEmptyInlineAsm |= + CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get()); + HasReturnsTwiceCall |= CI->canReturnTwice(); + } } // ---------------------- Helpers. @@ -689,7 +746,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Instruction *ThenTerm, Value *ValueIfFalse); }; -} // namespace +} // anonymous namespace char AddressSanitizer::ID = 0; INITIALIZE_PASS_BEGIN( @@ -697,12 +754,15 @@ INITIALIZE_PASS_BEGIN( "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel) { - return new AddressSanitizer(CompileKernel); +FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, + bool Recover) { + assert(!CompileKernel || Recover); + return new AddressSanitizer(CompileKernel, Recover); } char AddressSanitizerModule::ID = 0; @@ -711,8 +771,10 @@ INITIALIZE_PASS( "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) -ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel) { - return new AddressSanitizerModule(CompileKernel); +ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, + bool Recover) { + assert(!CompileKernel || Recover); + return new AddressSanitizerModule(CompileKernel, Recover); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -799,8 +861,10 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { getAllocaSizeInBytes(&AI) > 0 && // We are only interested in allocas not promotable to registers. // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) || - isDynamicAlloca(AI))); + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && + // inalloca allocas are not treated as static, and we don't want + // dynamic alloca instrumentation for them as well. + !AI.isUsedWithInAlloca()); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; @@ -868,10 +932,8 @@ static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) { } else { return false; } - if (!isPointerOperand(I->getOperand(0)) || - !isPointerOperand(I->getOperand(1))) - return false; - return true; + return isPointerOperand(I->getOperand(0)) && + isPointerOperand(I->getOperand(1)); } bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { @@ -919,7 +981,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, // If initialization order checking is disabled, a simple access to a // dynamically initialized global is always valid. GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL)); - if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) && + if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) && isSafeAccess(ObjSizeVis, Addr, TypeSize)) { NumOptimizedAccessesToGlobalVar++; return; @@ -1041,13 +1103,17 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); - BasicBlock *CrashBlock = + if (Recover) { + CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false); + } else { + BasicBlock *CrashBlock = BasicBlock::Create(*C, "", NextBB->getParent(), NextBB); - CrashTerm = new UnreachableInst(*C, CrashBlock); - BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); - ReplaceInstWithInst(CheckTerm, NewTerm); + CrashTerm = new UnreachableInst(*C, CrashBlock); + BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); + ReplaceInstWithInst(CheckTerm, NewTerm); + } } else { - CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true); + CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover); } Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, @@ -1084,7 +1150,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment( void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. - IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt()); + IRBuilder<> IRB(&GlobalInit.front(), + GlobalInit.front().getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); @@ -1147,6 +1214,14 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // Do not instrument globals from special LLVM sections. if (Section.find("__llvm") != StringRef::npos) return false; + // Do not instrument function pointers to initialization and termination + // routines: dynamic linker will not properly handle redzones. + if (Section.startswith(".preinit_array") || + Section.startswith(".init_array") || + Section.startswith(".fini_array")) { + return false; + } + // Callbacks put into the CRT initializer/terminator sections // should not be instrumented. // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 @@ -1162,10 +1237,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); - if (!ErrorCode.empty()) { - assert(false && "Invalid section specifier."); - return false; - } + assert(ErrorCode.empty() && "Invalid section specifier."); // Ignore the globals from the __OBJC section. The ObjC runtime assumes // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to @@ -1383,13 +1455,11 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string TypeStr = AccessIsWrite ? "store" : "load"; const std::string ExpStr = Exp ? "exp_" : ""; const std::string SuffixStr = CompileKernel ? "N" : "_n"; - const std::string EndingStr = CompileKernel ? "_noabort" : ""; - const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; - // TODO(glider): for KASan builds add _noabort to error reporting - // functions and make them actually noabort (remove the UnreachableInst). + const std::string EndingStr = Recover ? "_noabort" : ""; + Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; AsanErrorCallbackSized[AccessIsWrite][Exp] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr, + kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr, IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( @@ -1400,7 +1470,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanReportErrorTemplate + ExpStr + Suffix, + kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( @@ -1448,15 +1518,20 @@ bool AddressSanitizer::doInitialization(Module &M) { if (!CompileKernel) { std::tie(AsanCtorFunction, AsanInitFunction) = - createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName, - /*InitArgTypes=*/{}, - /*InitArgs=*/{}); + createSanitizerCtorAndInitFunctions( + M, kAsanModuleCtorName, kAsanInitName, + /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); } Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel); return true; } +bool AddressSanitizer::doFinalization(Module &M) { + GlobalsMD.reset(); + return false; +} + bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. @@ -1466,13 +1541,41 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // We cannot just ignore these methods, because they may call other // instrumented functions. if (F.getName().find(" load]") != std::string::npos) { - IRBuilder<> IRB(F.begin()->begin()); + IRBuilder<> IRB(&F.front(), F.front().begin()); IRB.CreateCall(AsanInitFunction, {}); return true; } return false; } +void AddressSanitizer::markEscapedLocalAllocas(Function &F) { + // Find the one possible call to llvm.localescape and pre-mark allocas passed + // to it as uninteresting. This assumes we haven't started processing allocas + // yet. This check is done up front because iterating the use list in + // isInterestingAlloca would be algorithmically slower. + assert(ProcessedAllocas.empty() && "must process localescape before allocas"); + + // Try to get the declaration of llvm.localescape. If it's not in the module, + // we can exit early. + if (!F.getParent()->getFunction("llvm.localescape")) return; + + // Look for a call to llvm.localescape call in the entry block. It can't be in + // any other block. + for (Instruction &I : F.getEntryBlock()) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); + if (II && II->getIntrinsicID() == Intrinsic::localescape) { + // We found a call. Mark all the allocas passed in as uninteresting. + for (Value *Arg : II->arg_operands()) { + AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts()); + assert(AI && AI->isStaticAlloca() && + "non-static alloca arg to localescape"); + ProcessedAllocas[AI] = false; + } + break; + } + } +} + bool AddressSanitizer::runOnFunction(Function &F) { if (&F == AsanCtorFunction) return false; if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; @@ -1488,6 +1591,12 @@ bool AddressSanitizer::runOnFunction(Function &F) { if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; + FunctionStateRAII CleanupObj(this); + + // We can't instrument allocas used with llvm.localescape. Only static allocas + // can be passed to that intrinsic. + markEscapedLocalAllocas(F); + // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallSet<Value *, 16> TempsToInstrument; @@ -1715,6 +1824,16 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() { void FunctionStackPoisoner::poisonStack() { assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0); + // Insert poison calls for lifetime intrinsics for alloca. + bool HavePoisonedAllocas = false; + for (const auto &APC : AllocaPoisonCallVec) { + assert(APC.InsBefore); + assert(APC.AI); + IRBuilder<> IRB(APC.InsBefore); + poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); + HavePoisonedAllocas |= APC.DoPoison; + } + if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) { // Handle dynamic allocas. createDynamicAllocasInitStorage(); @@ -1723,7 +1842,7 @@ void FunctionStackPoisoner::poisonStack() { unpoisonDynamicAllocas(); } - if (AllocaVec.size() == 0) return; + if (AllocaVec.empty()) return; int StackMallocIdx = -1; DebugLoc EntryDebugLocation; @@ -1734,6 +1853,19 @@ void FunctionStackPoisoner::poisonStack() { IRBuilder<> IRB(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); + // Make sure non-instrumented allocas stay in the entry block. Otherwise, + // debug info is broken, because only entry-block allocas are treated as + // regular stack slots. + auto InsBeforeB = InsBefore->getParent(); + assert(InsBeforeB == &F.getEntryBlock()); + for (BasicBlock::iterator I(InsBefore); I != InsBeforeB->end(); ++I) + if (auto *AI = dyn_cast<AllocaInst>(I)) + if (NonInstrumentedStaticAllocaVec.count(AI) > 0) + AI->moveBefore(InsBefore); + + // If we have a call to llvm.localescape, keep it in the entry block. + if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore); + SmallVector<ASanStackVariableDescription, 16> SVD; SVD.reserve(AllocaVec.size()); for (AllocaInst *AI : AllocaVec) { @@ -1751,10 +1883,15 @@ void FunctionStackPoisoner::poisonStack() { uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel && LocalStackSize <= kMaxStackMallocSize; - // Don't do dynamic alloca or stack malloc in presence of inline asm: - // too often it makes assumptions on which registers are available. - bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm; - DoStackMalloc &= !HasNonEmptyInlineAsm; + bool DoDynamicAlloca = ClDynamicAllocaStack; + // Don't do dynamic alloca or stack malloc if: + // 1) There is inline asm: too often it makes assumptions on which registers + // are available. + // 2) There is a returns_twice call (typically setjmp), which is + // optimization-hostile, and doesn't play well with introduced indirect + // register-relative calculation of local variable addresses. + DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; + DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); @@ -1804,16 +1941,6 @@ void FunctionStackPoisoner::poisonStack() { DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; } - // Insert poison calls for lifetime intrinsics for alloca. - bool HavePoisonedAllocas = false; - for (const auto &APC : AllocaPoisonCallVec) { - assert(APC.InsBefore); - assert(APC.AI); - IRBuilder<> IRB(APC.InsBefore); - poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); - HavePoisonedAllocas |= APC.DoPoison; - } - // Replace Alloca instructions with base+offset. for (const auto &Desc : SVD) { AllocaInst *AI = Desc.AI; diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index f685803..fd3dfd9 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -106,7 +106,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { } ++ChecksAdded; - Instruction *Inst = Builder->GetInsertPoint(); + BasicBlock::iterator Inst = Builder->GetInsertPoint(); BasicBlock *OldBB = Inst->getParent(); BasicBlock *Cont = OldBB->splitBasicBlock(Inst); OldBB->getTerminator()->eraseFromParent(); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h new file mode 100644 index 0000000..c47fdbf --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h @@ -0,0 +1,217 @@ +//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a Union-find algorithm to compute Minimum Spanning Tree +// for a given CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <string> +#include <utility> +#include <vector> + +namespace llvm { + +#define DEBUG_TYPE "cfgmst" + +/// \brief An union-find based Minimum Spanning Tree for CFG +/// +/// Implements a Union-find algorithm to compute Minimum Spanning Tree +/// for a given CFG. +template <class Edge, class BBInfo> class CFGMST { +public: + Function &F; + + // Store all the edges in CFG. It may contain some stale edges + // when Removed is set. + std::vector<std::unique_ptr<Edge>> AllEdges; + + // This map records the auxiliary information for each BB. + DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; + + // Find the root group of the G and compress the path from G to the root. + BBInfo *findAndCompressGroup(BBInfo *G) { + if (G->Group != G) + G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group)); + return static_cast<BBInfo *>(G->Group); + } + + // Union BB1 and BB2 into the same group and return true. + // Returns false if BB1 and BB2 are already in the same group. + bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { + BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); + BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); + + if (BB1G == BB2G) + return false; + + // Make the smaller rank tree a direct child or the root of high rank tree. + if (BB1G->Rank < BB2G->Rank) + BB1G->Group = BB2G; + else { + BB2G->Group = BB1G; + // If the ranks are the same, increment root of one tree by one. + if (BB1G->Rank == BB2G->Rank) + BB1G->Rank++; + } + return true; + } + + // Give BB, return the auxiliary information. + BBInfo &getBBInfo(const BasicBlock *BB) const { + auto It = BBInfos.find(BB); + assert(It->second.get() != nullptr); + return *It->second.get(); + } + + // Traverse the CFG using a stack. Find all the edges and assign the weight. + // Edges with large weight will be put into MST first so they are less likely + // to be instrumented. + void buildEdges() { + DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); + + const BasicBlock *BB = &(F.getEntryBlock()); + uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + // Add a fake edge to the entry. + addEdge(nullptr, BB, EntryWeight); + + // Special handling for single BB functions. + if (succ_empty(BB)) { + addEdge(BB, nullptr, EntryWeight); + return; + } + + static const uint32_t CriticalEdgeMultiplier = 1000; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + uint64_t BBWeight = + (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); + uint64_t Weight = 2; + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + BasicBlock *TargetBB = TI->getSuccessor(i); + bool Critical = isCriticalEdge(TI, i); + uint64_t scaleFactor = BBWeight; + if (Critical) { + if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) + scaleFactor *= CriticalEdgeMultiplier; + else + scaleFactor = UINT64_MAX; + } + if (BPI != nullptr) + Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); + addEdge(&*BB, TargetBB, Weight).IsCritical = Critical; + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " + << TargetBB->getName() << " w=" << Weight << "\n"); + } + } else { + addEdge(&*BB, nullptr, BBWeight); + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit" + << " w = " << BBWeight << "\n"); + } + } + } + + // Sort CFG edges based on its weight. + void sortEdgesByWeight() { + std::stable_sort(AllEdges.begin(), AllEdges.end(), + [](const std::unique_ptr<Edge> &Edge1, + const std::unique_ptr<Edge> &Edge2) { + return Edge1->Weight > Edge2->Weight; + }); + } + + // Traverse all the edges and compute the Minimum Weight Spanning Tree + // using union-find algorithm. + void computeMinimumSpanningTree() { + // First, put all the critical edge with landing-pad as the Dest to MST. + // This works around the insufficient support of critical edges split + // when destination BB is a landing pad. + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (Ei->IsCritical) { + if (Ei->DestBB && Ei->DestBB->isLandingPad()) { + if (unionGroups(Ei->SrcBB, Ei->DestBB)) + Ei->InMST = true; + } + } + } + + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (unionGroups(Ei->SrcBB, Ei->DestBB)) + Ei->InMST = true; + } + } + + // Dump the Debug information about the instrumentation. + void dumpEdges(raw_ostream &OS, const Twine &Message) const { + if (!Message.str().empty()) + OS << Message << "\n"; + OS << " Number of Basic Blocks: " << BBInfos.size() << "\n"; + for (auto &BI : BBInfos) { + const BasicBlock *BB = BI.first; + OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " " + << BI.second->infoString() << "\n"; + } + + OS << " Number of Edges: " << AllEdges.size() + << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; + uint32_t Count = 0; + for (auto &EI : AllEdges) + OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" + << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; + } + + // Add an edge to AllEdges with weight W. + Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { + uint32_t Index = BBInfos.size(); + auto Iter = BBInfos.end(); + bool Inserted; + std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); + if (Inserted) { + // Newly inserted, update the real info. + Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); + Index++; + } + std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); + if (Inserted) + // Newly inserted, update the real info. + Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); + AllEdges.emplace_back(new Edge(Src, Dest, W)); + return *AllEdges.back(); + } + + BranchProbabilityInfo *BPI; + BlockFrequencyInfo *BFI; + +public: + CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, + BlockFrequencyInfo *BFI_ = nullptr) + : F(Func), BPI(BPI_), BFI(BFI_) { + buildEdges(); + sortEdgesByWeight(); + computeMinimumSpanningTree(); + } +}; + +#undef DEBUG_TYPE // "cfgmst" +} // end namespace llvm diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 2de6e1a..d459fc5 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -72,6 +72,11 @@ using namespace llvm; +// External symbol to be used when generating the shadow address for +// architectures with multiple VMAs. Instead of using a constant integer +// the runtime will set the external mask based on the VMA range. +static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask"; + // The -dfsan-preserve-alignment flag controls whether this pass assumes that // alignment requirements provided by the input IR are correct. For example, // if the input IR contains a load with alignment 8, this flag will cause @@ -124,6 +129,7 @@ static cl::opt<bool> ClDebugNonzeroLabels( "load or return with a nonzero label"), cl::Hidden); + namespace { StringRef GetGlobalTypeString(const GlobalValue &G) { @@ -231,6 +237,7 @@ class DataFlowSanitizer : public ModulePass { void *(*GetRetvalTLSPtr)(); Constant *GetArgTLS; Constant *GetRetvalTLS; + Constant *ExternalShadowMask; FunctionType *DFSanUnionFnTy; FunctionType *DFSanUnionLoadFnTy; FunctionType *DFSanUnimplementedFnTy; @@ -248,7 +255,7 @@ class DataFlowSanitizer : public ModulePass { DFSanABIList ABIList; DenseMap<Value *, Function *> UnwrappedFnMap; AttributeSet ReadOnlyNoneAttrs; - DenseMap<const Function *, DISubprogram *> FunctionDIs; + bool DFSanRuntimeShadowMask; Value *getShadowAddress(Value *Addr, Instruction *Pos); bool isInstrumented(const Function *F); @@ -362,7 +369,8 @@ llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles, DataFlowSanitizer::DataFlowSanitizer( const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(), void *(*getRetValTLS)()) - : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) { + : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS), + DFSanRuntimeShadowMask(false) { std::vector<std::string> AllABIListFiles(std::move(ABIListFiles)); AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(), ClABIListFiles.end()); @@ -420,6 +428,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) { bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el; + bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 || + TargetTriple.getArch() == llvm::Triple::aarch64_be; const DataLayout &DL = M.getDataLayout(); @@ -434,6 +444,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) { ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); else if (IsMIPS64) ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL); + // AArch64 supports multiple VMAs and the shadow mask is set at runtime. + else if (IsAArch64) + DFSanRuntimeShadowMask = true; else report_fatal_error("unsupported triple"); @@ -578,7 +591,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) - DFSF.ValShadowMap[ValAI] = ShadowAI; + DFSF.ValShadowMap[&*ValAI] = &*ShadowAI; DFSanVisitor(DFSF).visitCallInst(*CI); if (!FT->getReturnType()->isVoidTy()) new StoreInst(DFSF.getShadow(RI->getReturnValue()), @@ -592,8 +605,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (ABIList.isIn(M, "skip")) return false; - FunctionDIs = makeSubprogramMap(M); - if (!GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); @@ -606,6 +617,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) { G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); } + ExternalShadowMask = + Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); + DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast<Function>(DFSanUnionFn)) { F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); @@ -643,16 +657,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) { std::vector<Function *> FnsToInstrument; llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI; - for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) { - if (!i->isIntrinsic() && - i != DFSanUnionFn && - i != DFSanCheckedUnionFn && - i != DFSanUnionLoadFn && - i != DFSanUnimplementedFn && - i != DFSanSetLabelFn && - i != DFSanNonzeroLabelFn && - i != DFSanVarargWrapperFn) - FnsToInstrument.push_back(&*i); + for (Function &i : M) { + if (!i.isIntrinsic() && + &i != DFSanUnionFn && + &i != DFSanCheckedUnionFn && + &i != DFSanUnionLoadFn && + &i != DFSanUnimplementedFn && + &i != DFSanSetLabelFn && + &i != DFSanNonzeroLabelFn && + &i != DFSanVarargWrapperFn) + FnsToInstrument.push_back(&i); } // Give function aliases prefixes when necessary, and build wrappers where the @@ -710,7 +724,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); FArg != FArgEnd; ++FArg, ++NewFArg) { - FArg->replaceAllUsesWith(NewFArg); + FArg->replaceAllUsesWith(&*NewFArg); } NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); @@ -750,11 +764,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) { ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); F.replaceAllUsesWith(WrappedFnCst); - // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(&F); - if (DI != FunctionDIs.end()) - DI->second->replaceFunction(&F); - UnwrappedFnMap[WrappedFnCst] = &F; *i = NewF; @@ -842,7 +851,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (Instruction *I = dyn_cast<Instruction>(V)) Pos = I->getNextNode(); else - Pos = DFSF.F->getEntryBlock().begin(); + Pos = &DFSF.F->getEntryBlock().front(); while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos)) Pos = Pos->getNextNode(); IRBuilder<> IRB(Pos); @@ -864,7 +873,7 @@ Value *DFSanFunction::getArgTLSPtr() { if (DFS.ArgTLS) return ArgTLSPtr = DFS.ArgTLS; - IRBuilder<> IRB(F->getEntryBlock().begin()); + IRBuilder<> IRB(&F->getEntryBlock().front()); return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {}); } @@ -874,7 +883,7 @@ Value *DFSanFunction::getRetvalTLS() { if (DFS.RetvalTLS) return RetvalTLSPtr = DFS.RetvalTLS; - IRBuilder<> IRB(F->getEntryBlock().begin()); + IRBuilder<> IRB(&F->getEntryBlock().front()); return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {}); } @@ -906,7 +915,7 @@ Value *DFSanFunction::getShadow(Value *V) { Function::arg_iterator i = F->arg_begin(); while (ArgIdx--) ++i; - Shadow = i; + Shadow = &*i; assert(Shadow->getType() == DFS.ShadowTy); break; } @@ -928,9 +937,15 @@ void DFSanFunction::setShadow(Instruction *I, Value *Shadow) { Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { assert(Addr != RetvalTLS && "Reinstrumenting?"); IRBuilder<> IRB(Pos); + Value *ShadowPtrMaskValue; + if (DFSanRuntimeShadowMask) + ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask); + else + ShadowPtrMaskValue = ShadowPtrMask; return IRB.CreateIntToPtr( IRB.CreateMul( - IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask), + IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), + IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)), ShadowPtrMul), ShadowPtrTy); } @@ -991,7 +1006,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { Call->addAttribute(2, Attribute::ZExt); BasicBlock *Tail = BI->getSuccessor(0); - PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin()); + PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); Phi->addIncoming(Call, Call->getParent()); Phi->addIncoming(V1, Head); @@ -1105,7 +1120,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow); BasicBlock *Head = Pos->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(Pos); + BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator()); if (DomTreeNode *OldNode = DT.getNode(Head)) { std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); @@ -1475,8 +1490,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (FT->isVarArg()) { auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, CS.arg_size() - FT->getNumParams()); - auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva", - DFSF.F->getEntryBlock().begin()); + auto *LabelVAAlloca = new AllocaInst( + LabelVATy, "labelva", &DFSF.F->getEntryBlock().front()); for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); @@ -1490,7 +1505,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (!DFSF.LabelReturnAlloca) { DFSF.LabelReturnAlloca = new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn", - DFSF.F->getEntryBlock().begin()); + &DFSF.F->getEntryBlock().front()); } Args.push_back(DFSF.LabelReturnAlloca); } @@ -1529,13 +1544,14 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (!CS.getType()->isVoidTy()) { if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { if (II->getNormalDest()->getSinglePredecessor()) { - Next = II->getNormalDest()->begin(); + Next = &II->getNormalDest()->front(); } else { BasicBlock *NewBB = SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT); - Next = NewBB->begin(); + Next = &NewBB->front(); } } else { + assert(CS->getIterator() != CS->getParent()->end()); Next = CS->getNextNode(); } @@ -1568,7 +1584,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { unsigned VarArgSize = CS.arg_size() - FT->getNumParams(); ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); AllocaInst *VarArgShadow = - new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin()); + new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front()); Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); for (unsigned n = 0; i != e; ++i, ++n) { IRB.CreateStore( diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 9a3ed5c..fa939ae 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -138,6 +138,7 @@ namespace { Module *M; LLVMContext *Ctx; SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs; + DenseMap<DISubprogram *, Function *> FnMap; }; } @@ -309,13 +310,12 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident, - bool UseCfgChecksum, bool ExitBlockBeforeBody) + GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os, + uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody) : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0), ReturnBlock(1, os) { this->os = os; - Function *F = SP->getFunction(); DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); uint32_t i = 0; @@ -347,8 +347,8 @@ namespace { std::string EdgeDestinations; raw_string_ostream EDOS(EdgeDestinations); Function *F = Blocks.begin()->first->getParent(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - GCOVBlock &Block = getBlock(I); + for (BasicBlock &I : *F) { + GCOVBlock &Block = getBlock(&I); for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) EDOS << Block.OutEdges[i]->Number; } @@ -389,8 +389,8 @@ namespace { // Emit edges between blocks. if (Blocks.empty()) return; Function *F = Blocks.begin()->first->getParent(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - GCOVBlock &Block = getBlock(I); + for (BasicBlock &I : *F) { + GCOVBlock &Block = getBlock(&I); if (Block.OutEdges.empty()) continue; writeBytes(EdgeTag, 4); @@ -405,9 +405,8 @@ namespace { } // Emit lines for each block. - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - getBlock(I).writeOut(); - } + for (BasicBlock &I : *F) + getBlock(&I).writeOut(); } private: @@ -451,6 +450,12 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); + FnMap.clear(); + for (Function &F : M) { + if (DISubprogram *SP = F.getSubprogram()) + FnMap[SP] = &F; + } + if (Options.EmitNotes) emitProfileNotes(); if (Options.EmitData) return emitProfileArcs(); return false; @@ -495,7 +500,7 @@ void GCOVProfiler::emitProfileNotes() { unsigned FunctionIdent = 0; for (auto *SP : CU->getSubprograms()) { - Function *F = SP->getFunction(); + Function *F = FnMap[SP]; if (!F) continue; if (!functionHasLines(F)) continue; @@ -507,13 +512,13 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++, + Funcs.push_back(make_unique<GCOVFunction>(SP, F, &out, FunctionIdent++, Options.UseCfgChecksum, Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); + GCOVBlock &Block = Func.getBlock(&*BB); TerminatorInst *TI = BB->getTerminator(); if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { @@ -574,7 +579,7 @@ bool GCOVProfiler::emitProfileArcs() { auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i)); SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; for (auto *SP : CU->getSubprograms()) { - Function *F = SP->getFunction(); + Function *F = FnMap[SP]; if (!F) continue; if (!functionHasLines(F)) continue; if (!Result) Result = true; @@ -605,7 +610,7 @@ bool GCOVProfiler::emitProfileArcs() { int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); if (Successors) { if (Successors == 1) { - IRBuilder<> Builder(BB->getFirstInsertionPt()); + IRBuilder<> Builder(&*BB->getFirstInsertionPt()); Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge); Value *Count = Builder.CreateLoad(Counter); @@ -625,7 +630,7 @@ bool GCOVProfiler::emitProfileArcs() { Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else { - ComplexEdgePreds.insert(BB); + ComplexEdgePreds.insert(&*BB); for (int i = 0; i != Successors; ++i) ComplexEdgeSuccs.insert(TI->getSuccessor(i)); } @@ -641,13 +646,13 @@ bool GCOVProfiler::emitProfileArcs() { GlobalVariable *EdgeState = getEdgeStateValue(); for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { - IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt()); + IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt()); Builder.CreateStore(Builder.getInt32(i), EdgeState); } for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { // Call runtime to perform increment. - IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt()); + IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt()); Value *CounterPtrArray = Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, i * ComplexEdgePreds.size()); @@ -731,8 +736,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( IRBuilder<> Builder(Succ); Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); - EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + - (Preds.idFor(BB)-1)] = cast<Constant>(Counter); + EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) + + (Preds.idFor(&*BB) - 1)] = cast<Constant>(Counter); } } Edge += Successors; @@ -901,7 +906,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { // uint32_t pred = *predecessor; // if (pred == 0xffffffff) return; - Argument *Arg = Fn->arg_begin(); + Argument *Arg = &*Fn->arg_begin(); Arg->setName("predecessor"); Value *Pred = Builder.CreateLoad(Arg, "pred"); Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff)); @@ -912,7 +917,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { // uint64_t *counter = counters[pred]; // if (!counter) return; Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty()); - Arg = std::next(Fn->arg_begin()); + Arg = &*std::next(Fn->arg_begin()); Arg->setName("counters"); Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred); Value *Counter = Builder.CreateLoad(GEP, "counter"); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 712bf8e..92e41ee 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// // -// This pass lowers instrprof_increment intrinsics emitted by a frontend for -// profiling. It also builds the data structures and initialization code needed -// for updating execution counts and emitting the profile at runtime. +// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling. +// It also builds the data structures and initialization code needed for +// updating execution counts and emitting the profile at runtime. // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" - #include "llvm/ADT/Triple.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -49,7 +49,15 @@ public: private: InstrProfOptions Options; Module *M; - DenseMap<GlobalVariable *, GlobalVariable *> RegionCounters; + typedef struct PerFunctionProfileData { + uint32_t NumValueSites[IPVK_Last+1]; + GlobalVariable* RegionCounters; + GlobalVariable* DataVar; + PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) { + memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1)); + } + } PerFunctionProfileData; + DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap; std::vector<Value *> UsedVars; bool isMachO() const { @@ -58,24 +66,30 @@ private: /// Get the section name for the counter variables. StringRef getCountersSection() const { - return isMachO() ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts"; + return getInstrProfCountersSectionName(isMachO()); } /// Get the section name for the name variables. StringRef getNameSection() const { - return isMachO() ? "__DATA,__llvm_prf_names" : "__llvm_prf_names"; + return getInstrProfNameSectionName(isMachO()); } /// Get the section name for the profile data variables. StringRef getDataSection() const { - return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data"; + return getInstrProfDataSectionName(isMachO()); } /// Get the section name for the coverage mapping data. StringRef getCoverageSection() const { - return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap"; + return getInstrProfCoverageSectionName(isMachO()); } + /// Count the number of instrumented value sites for the function. + void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); + + /// Replace instrprof_value_profile with a call to runtime library. + void lowerValueProfileInst(InstrProfValueProfileInst *Ins); + /// Replace instrprof_increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); @@ -117,20 +131,37 @@ bool InstrProfiling::runOnModule(Module &M) { bool MadeChange = false; this->M = &M; - RegionCounters.clear(); + ProfileDataMap.clear(); UsedVars.clear(); + // We did not know how many value sites there would be inside + // the instrumented function. This is counting the number of instrumented + // target value sites to enter it as field in the profile data variable. for (Function &F : M) for (BasicBlock &BB : F) for (auto I = BB.begin(), E = BB.end(); I != E;) - if (auto *Inc = dyn_cast<InstrProfIncrementInst>(I++)) { + if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++)) + computeNumValueSiteCounts(Ind); + + for (Function &F : M) + for (BasicBlock &BB : F) + for (auto I = BB.begin(), E = BB.end(); I != E;) { + auto Instr = I++; + if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) { lowerIncrement(Inc); MadeChange = true; + } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) { + lowerValueProfileInst(Ind); + MadeChange = true; } - if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) { + } + + if (GlobalVariable *Coverage = + M.getNamedGlobal(getCoverageMappingVarName())) { lowerCoverageData(Coverage); MadeChange = true; } + if (!MadeChange) return false; @@ -141,10 +172,59 @@ bool InstrProfiling::runOnModule(Module &M) { return true; } +static Constant *getOrInsertValueProfilingCall(Module &M) { + LLVMContext &Ctx = M.getContext(); + auto *ReturnTy = Type::getVoidTy(M.getContext()); + Type *ParamTypes[] = { +#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType +#include "llvm/ProfileData/InstrProfData.inc" + }; + auto *ValueProfilingCallTy = + FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); + return M.getOrInsertFunction(getInstrProfValueProfFuncName(), + ValueProfilingCallTy); +} + +void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { + + GlobalVariable *Name = Ind->getName(); + uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); + uint64_t Index = Ind->getIndex()->getZExtValue(); + auto It = ProfileDataMap.find(Name); + if (It == ProfileDataMap.end()) { + PerFunctionProfileData PD; + PD.NumValueSites[ValueKind] = Index + 1; + ProfileDataMap[Name] = PD; + } else if (It->second.NumValueSites[ValueKind] <= Index) + It->second.NumValueSites[ValueKind] = Index + 1; +} + +void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { + + GlobalVariable *Name = Ind->getName(); + auto It = ProfileDataMap.find(Name); + assert(It != ProfileDataMap.end() && It->second.DataVar && + "value profiling detected in function with no counter incerement"); + + GlobalVariable *DataVar = It->second.DataVar; + uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); + uint64_t Index = Ind->getIndex()->getZExtValue(); + for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind) + Index += It->second.NumValueSites[Kind]; + + IRBuilder<> Builder(Ind); + Value* Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; + Ind->replaceAllUsesWith( + Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args)); + Ind->eraseFromParent(); +} + void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { GlobalVariable *Counters = getOrCreateRegionCounters(Inc); - IRBuilder<> Builder(Inc->getParent(), *Inc); + IRBuilder<> Builder(Inc); uint64_t Index = Inc->getIndex()->getZExtValue(); Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); Value *Count = Builder.CreateLoad(Addr, "pgocount"); @@ -172,9 +252,10 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { GlobalVariable *Name = cast<GlobalVariable>(V); // If we have region counters for this name, we've already handled it. - auto It = RegionCounters.find(Name); - if (It != RegionCounters.end()) - continue; + auto It = ProfileDataMap.find(Name); + if (It != ProfileDataMap.end()) + if (It->second.RegionCounters) + continue; // Move the name variable to the right section. Name->setSection(getNameSection()); @@ -183,69 +264,108 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { } /// Get the name of a profiling variable for a particular function. -static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) { - auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer()); - StringRef Name = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); - return ("__llvm_profile_" + VarName + "_" + Name).str(); +static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { + StringRef NamePrefix = getInstrProfNameVarPrefix(); + StringRef Name = Inc->getName()->getName().substr(NamePrefix.size()); + return (Prefix + Name).str(); +} + +static inline bool shouldRecordFunctionAddr(Function *F) { + // Check the linkage + if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && + !F->hasAvailableExternallyLinkage()) + return true; + // Check uses of this function for other than direct calls or invokes to it. + return F->hasAddressTaken(); +} + +static inline Comdat *getOrCreateProfileComdat(Module &M, + InstrProfIncrementInst *Inc) { + // COFF format requires a COMDAT section to have a key symbol with the same + // name. The linker targeting COFF also requires that the COMDAT section + // a section is associated to must precede the associating section. For this + // reason, we must choose the name var's name as the name of the comdat. + StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF() + ? getInstrProfNameVarPrefix() + : getInstrProfComdatPrefix()); + return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix))); } GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { - GlobalVariable *Name = Inc->getName(); - auto It = RegionCounters.find(Name); - if (It != RegionCounters.end()) - return It->second; - - // Move the name variable to the right section. Make sure it is placed in the - // same comdat as its associated function. Otherwise, we may get multiple - // counters for the same function in certain cases. + GlobalVariable *NamePtr = Inc->getName(); + auto It = ProfileDataMap.find(NamePtr); + PerFunctionProfileData PD; + if (It != ProfileDataMap.end()) { + if (It->second.RegionCounters) + return It->second.RegionCounters; + PD = It->second; + } + + // Move the name variable to the right section. Place them in a COMDAT group + // if the associated function is a COMDAT. This will make sure that + // only one copy of counters of the COMDAT function will be emitted after + // linking. Function *Fn = Inc->getParent()->getParent(); - Name->setSection(getNameSection()); - Name->setAlignment(1); - Name->setComdat(Fn->getComdat()); + Comdat *ProfileVarsComdat = nullptr; + if (Fn->hasComdat()) + ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc); + NamePtr->setSection(getNameSection()); + NamePtr->setAlignment(1); + NamePtr->setComdat(ProfileVarsComdat); uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); // Create the counters variable. - auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(), - Constant::getNullValue(CounterTy), - getVarName(Inc, "counters")); - Counters->setVisibility(Name->getVisibility()); - Counters->setSection(getCountersSection()); - Counters->setAlignment(8); - Counters->setComdat(Fn->getComdat()); - - RegionCounters[Inc->getName()] = Counters; + auto *CounterPtr = + new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(), + Constant::getNullValue(CounterTy), + getVarName(Inc, getInstrProfCountersVarPrefix())); + CounterPtr->setVisibility(NamePtr->getVisibility()); + CounterPtr->setSection(getCountersSection()); + CounterPtr->setAlignment(8); + CounterPtr->setComdat(ProfileVarsComdat); // Create data variable. - auto *NameArrayTy = Name->getType()->getPointerElementType(); - auto *Int32Ty = Type::getInt32Ty(Ctx); - auto *Int64Ty = Type::getInt64Ty(Ctx); auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); - auto *Int64PtrTy = Type::getInt64PtrTy(Ctx); - - Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy}; + auto *Int16Ty = Type::getInt16Ty(Ctx); + auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1); + Type *DataTypes[] = { + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, + #include "llvm/ProfileData/InstrProfData.inc" + }; auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes)); + + Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ? + ConstantExpr::getBitCast(Fn, Int8PtrTy) : + ConstantPointerNull::get(Int8PtrTy); + + Constant *Int16ArrayVals[IPVK_Last+1]; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); + Constant *DataVals[] = { - ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()), - ConstantInt::get(Int32Ty, NumCounters), - ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()), - ConstantExpr::getBitCast(Name, Int8PtrTy), - ConstantExpr::getBitCast(Counters, Int64PtrTy)}; - auto *Data = new GlobalVariable(*M, DataTy, true, Name->getLinkage(), + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, + #include "llvm/ProfileData/InstrProfData.inc" + }; + auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(), ConstantStruct::get(DataTy, DataVals), - getVarName(Inc, "data")); - Data->setVisibility(Name->getVisibility()); + getVarName(Inc, getInstrProfDataVarPrefix())); + Data->setVisibility(NamePtr->getVisibility()); Data->setSection(getDataSection()); - Data->setAlignment(8); - Data->setComdat(Fn->getComdat()); + Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT); + Data->setComdat(ProfileVarsComdat); + + PD.RegionCounters = CounterPtr; + PD.DataVar = Data; + ProfileDataMap[NamePtr] = PD; // Mark the data variable as used so that it isn't stripped out. UsedVars.push_back(Data); - return Counters; + return CounterPtr; } void InstrProfiling::emitRegistration() { @@ -253,20 +373,24 @@ void InstrProfiling::emitRegistration() { if (Triple(M->getTargetTriple()).isOSDarwin()) return; + // Use linker script magic to get data/cnts/name start/end. + if (Triple(M->getTargetTriple()).isOSLinux() || + Triple(M->getTargetTriple()).isOSFreeBSD()) + return; + // Construct the function. auto *VoidTy = Type::getVoidTy(M->getContext()); auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext()); auto *RegisterFTy = FunctionType::get(VoidTy, false); auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage, - "__llvm_profile_register_functions", M); + getInstrProfRegFuncsName(), M); RegisterF->setUnnamedAddr(true); - if (Options.NoRedZone) - RegisterF->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone); auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); auto *RuntimeRegisterF = Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage, - "__llvm_profile_register_function", M); + getInstrProfRegFuncName(), M); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); for (Value *Data : UsedVars) @@ -275,26 +399,27 @@ void InstrProfiling::emitRegistration() { } void InstrProfiling::emitRuntimeHook() { - const char *const RuntimeVarName = "__llvm_profile_runtime"; - const char *const RuntimeUserName = "__llvm_profile_runtime_user"; - // If the module's provided its own runtime, we don't need to do anything. - if (M->getGlobalVariable(RuntimeVarName)) + // We expect the linker to be invoked with -u<hook_var> flag for linux, + // for which case there is no need to emit the user function. + if (Triple(M->getTargetTriple()).isOSLinux()) return; + // If the module's provided its own runtime, we don't need to do anything. + if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return; + // Declare an external variable that will pull in the runtime initialization. auto *Int32Ty = Type::getInt32Ty(M->getContext()); auto *Var = new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage, - nullptr, RuntimeVarName); + nullptr, getInstrProfRuntimeHookVarName()); // Make a function that uses it. - auto *User = - Function::Create(FunctionType::get(Int32Ty, false), - GlobalValue::LinkOnceODRLinkage, RuntimeUserName, M); + auto *User = Function::Create(FunctionType::get(Int32Ty, false), + GlobalValue::LinkOnceODRLinkage, + getInstrProfRuntimeHookVarUseFuncName(), M); User->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) - User->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) User->addFnAttr(Attribute::NoRedZone); User->setVisibility(GlobalValue::HiddenVisibility); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User)); @@ -330,26 +455,23 @@ void InstrProfiling::emitUses() { LLVMUsed = new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, ConstantArray::get(ATy, MergedVars), "llvm.used"); - LLVMUsed->setSection("llvm.metadata"); } void InstrProfiling::emitInitialization() { std::string InstrProfileOutput = Options.InstrProfileOutput; - Constant *RegisterF = M->getFunction("__llvm_profile_register_functions"); - if (!RegisterF && InstrProfileOutput.empty()) - return; + Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName()); + if (!RegisterF && InstrProfileOutput.empty()) return; // Create the initialization function. auto *VoidTy = Type::getVoidTy(M->getContext()); - auto *F = - Function::Create(FunctionType::get(VoidTy, false), - GlobalValue::InternalLinkage, "__llvm_profile_init", M); + auto *F = Function::Create(FunctionType::get(VoidTy, false), + GlobalValue::InternalLinkage, + getInstrProfInitFuncName(), M); F->setUnnamedAddr(true); F->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) - F->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) F->addFnAttr(Attribute::NoRedZone); // Add the basic block and the necessary calls. IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F)); @@ -358,9 +480,8 @@ void InstrProfiling::emitInitialization() { if (!InstrProfileOutput.empty()) { auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false); - auto *SetNameF = - Function::Create(SetNameTy, GlobalValue::ExternalLinkage, - "__llvm_profile_override_default_filename", M); + auto *SetNameF = Function::Create(SetNameTy, GlobalValue::ExternalLinkage, + getInstrProfFileOverriderFuncName(), M); // Create variable for profile name. Constant *ProfileNameConst = diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 2750585..a05a5fa 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -12,12 +12,47 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm-c/Initialization.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; +/// Moves I before IP. Returns new insert point. +static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) { + // If I is IP, move the insert point down. + if (I == IP) + return ++IP; + // Otherwise, move I before IP and return IP. + I->moveBefore(&*IP); + return IP; +} + +/// Instrumentation passes often insert conditional checks into entry blocks. +/// Call this function before splitting the entry block to move instructions +/// that must remain in the entry block up before the split point. Static +/// allocas and llvm.localescape calls, for example, must remain in the entry +/// block. +BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB, + BasicBlock::iterator IP) { + assert(&BB.getParent()->getEntryBlock() == &BB); + for (auto I = IP, E = BB.end(); I != E; ++I) { + bool KeepInEntry = false; + if (auto *AI = dyn_cast<AllocaInst>(I)) { + if (AI->isStaticAlloca()) + KeepInEntry = true; + } else if (auto *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == llvm::Intrinsic::localescape) + KeepInEntry = true; + } + if (KeepInEntry) + IP = moveBeforeInsertPoint(I, IP); + } + return IP; +} + /// initializeInstrumentation - Initialize all passes in the TransformUtils /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { @@ -25,6 +60,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingPass(Registry); initializeGCOVProfilerPass(Registry); + initializePGOInstrumentationGenPass(Registry); + initializePGOInstrumentationUsePass(Registry); initializeInstrProfilingPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 286a563..5a7bce5 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -148,7 +148,7 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", cl::desc("poison uninitialized stack variables with a call"), cl::Hidden, cl::init(false)); static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", - cl::desc("poison uninitialized stack variables with the given patter"), + cl::desc("poison uninitialized stack variables with the given pattern"), cl::Hidden, cl::init(0xff)); static cl::opt<bool> ClPoisonUndef("msan-poison-undef", cl::desc("poison undef temps"), @@ -222,10 +222,17 @@ static const MemoryMapParams Linux_I386_MemoryMapParams = { // x86_64 Linux static const MemoryMapParams Linux_X86_64_MemoryMapParams = { +#ifdef MSAN_LINUX_X86_64_OLD_MAPPING 0x400000000000, // AndMask 0, // XorMask (not used) 0, // ShadowBase (not used) 0x200000000000, // OriginBase +#else + 0, // AndMask (not used) + 0x500000000000, // XorMask + 0, // ShadowBase (not used) + 0x100000000000, // OriginBase +#endif }; // mips64 Linux @@ -244,6 +251,14 @@ static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = { 0x1C0000000000, // OriginBase }; +// aarch64 Linux +static const MemoryMapParams Linux_AArch64_MemoryMapParams = { + 0, // AndMask (not used) + 0x06000000000, // XorMask + 0, // ShadowBase (not used) + 0x01000000000, // OriginBase +}; + // i386 FreeBSD static const MemoryMapParams FreeBSD_I386_MemoryMapParams = { 0x000180000000, // AndMask @@ -266,15 +281,20 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { }; static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { - NULL, + nullptr, &Linux_MIPS64_MemoryMapParams, }; static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = { - NULL, + nullptr, &Linux_PowerPC64_MemoryMapParams, }; +static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { + nullptr, + &Linux_AArch64_MemoryMapParams, +}; + static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_I386_MemoryMapParams, &FreeBSD_X86_64_MemoryMapParams, @@ -353,8 +373,9 @@ class MemorySanitizer : public FunctionPass { friend struct MemorySanitizerVisitor; friend struct VarArgAMD64Helper; friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; }; -} // namespace +} // anonymous namespace char MemorySanitizer::ID = 0; INITIALIZE_PASS(MemorySanitizer, "msan", @@ -377,7 +398,6 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M, GlobalValue::PrivateLinkage, StrConst, ""); } - /// \brief Insert extern declaration of runtime-provided functions and globals. void MemorySanitizer::initializeCallbacks(Module &M) { // Only do this once. @@ -496,6 +516,10 @@ bool MemorySanitizer::doInitialization(Module &M) { case Triple::ppc64le: MapParams = Linux_PowerPC_MemoryMapParams.bits64; break; + case Triple::aarch64: + case Triple::aarch64_be: + MapParams = Linux_ARM_MemoryMapParams.bits64; + break; default: report_fatal_error("unsupported architecture"); } @@ -697,7 +721,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Cmp = IRB.CreateICmpNE( ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( - Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights); + Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights); IRBuilder<> IRBNew(CheckTerm); paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), getOriginPtr(Addr, IRBNew, Alignment), StoreSize, @@ -893,16 +917,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Offset = (Addr & ~AndMask) ^ XorMask Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) { + Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy); + uint64_t AndMask = MS.MapParams->AndMask; - assert(AndMask != 0 && "AndMask shall be specified"); - Value *OffsetLong = - IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy), - ConstantInt::get(MS.IntptrTy, ~AndMask)); + if (AndMask) + OffsetLong = + IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask)); uint64_t XorMask = MS.MapParams->XorMask; - if (XorMask != 0) - OffsetLong = IRB.CreateXor(OffsetLong, - ConstantInt::get(MS.IntptrTy, XorMask)); + if (XorMask) + OffsetLong = + IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask)); return OffsetLong; } @@ -1339,6 +1364,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } void visitBitCastInst(BitCastInst &I) { + // Special case: if this is the bitcast (there is exactly 1 allowed) between + // a musttail call and a ret, don't instrument. New instructions are not + // allowed after a musttail call. + if (auto *CI = dyn_cast<CallInst>(I.getOperand(0))) + if (CI->isMustTailCall()) + return; IRBuilder<> IRB(&I); setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I))); setOrigin(&I, getOrigin(&I, 0)); @@ -1570,18 +1601,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Type *EltTy = Ty->getSequentialElementType(); SmallVector<Constant *, 16> Elements; for (unsigned Idx = 0; Idx < NumElements; ++Idx) { - ConstantInt *Elt = - dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx)); - APInt V = Elt->getValue(); - APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); - Elements.push_back(ConstantInt::get(EltTy, V2)); + if (ConstantInt *Elt = + dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) { + APInt V = Elt->getValue(); + APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); + Elements.push_back(ConstantInt::get(EltTy, V2)); + } else { + Elements.push_back(ConstantInt::get(EltTy, 1)); + } } ShadowMul = ConstantVector::get(Elements); } else { - ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg); - APInt V = Elt->getValue(); - APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); - ShadowMul = ConstantInt::get(Elt->getType(), V2); + if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) { + APInt V = Elt->getValue(); + APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); + ShadowMul = ConstantInt::get(Ty, V2); + } else { + ShadowMul = ConstantInt::get(Ty, 1); + } } IRBuilder<> IRB(&I); @@ -1730,25 +1767,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Instrument signed relational comparisons. /// - /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by - /// propagating the highest bit of the shadow. Everything else is delegated - /// to handleShadowOr(). + /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest + /// bit of the shadow. Everything else is delegated to handleShadowOr(). void handleSignedRelationalComparison(ICmpInst &I) { - Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0)); - Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1)); - Value* op = nullptr; - CmpInst::Predicate pre = I.getPredicate(); - if (constOp0 && constOp0->isNullValue() && - (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) { - op = I.getOperand(1); - } else if (constOp1 && constOp1->isNullValue() && - (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) { + Constant *constOp; + Value *op = nullptr; + CmpInst::Predicate pre; + if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) { op = I.getOperand(0); + pre = I.getPredicate(); + } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) { + op = I.getOperand(1); + pre = I.getSwappedPredicate(); + } else { + handleShadowOr(I); + return; } - if (op) { + + if ((constOp->isNullValue() && + (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) || + (constOp->isAllOnesValue() && + (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) { IRBuilder<> IRB(&I); - Value* Shadow = - IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt"); + Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), + "_msprop_icmp_s"); setShadow(&I, Shadow); setOrigin(&I, getOrigin(op)); } else { @@ -1860,25 +1902,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { VAHelper->visitVACopyInst(I); } - enum IntrinsicKind { - IK_DoesNotAccessMemory, - IK_OnlyReadsMemory, - IK_WritesMemory - }; - - static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) { - const int DoesNotAccessMemory = IK_DoesNotAccessMemory; - const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory; - const int OnlyReadsMemory = IK_OnlyReadsMemory; - const int OnlyAccessesArgumentPointees = IK_WritesMemory; - const int UnknownModRefBehavior = IK_WritesMemory; -#define GET_INTRINSIC_MODREF_BEHAVIOR -#define ModRefBehavior IntrinsicKind -#include "llvm/IR/Intrinsics.gen" -#undef ModRefBehavior -#undef GET_INTRINSIC_MODREF_BEHAVIOR - } - /// \brief Handle vector store-like intrinsics. /// /// Instrument intrinsics that look like a simple SIMD store: writes memory, @@ -1978,17 +2001,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (NumArgOperands == 0) return false; - Intrinsic::ID iid = I.getIntrinsicID(); - IntrinsicKind IK = getIntrinsicKind(iid); - bool OnlyReadsMemory = IK == IK_OnlyReadsMemory; - bool WritesMemory = IK == IK_WritesMemory; - assert(!(OnlyReadsMemory && WritesMemory)); - if (NumArgOperands == 2 && I.getArgOperand(0)->getType()->isPointerTy() && I.getArgOperand(1)->getType()->isVectorTy() && I.getType()->isVoidTy() && - WritesMemory) { + !I.onlyReadsMemory()) { // This looks like a vector store. return handleVectorStoreIntrinsic(I); } @@ -1996,12 +2013,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (NumArgOperands == 1 && I.getArgOperand(0)->getType()->isPointerTy() && I.getType()->isVectorTy() && - OnlyReadsMemory) { + I.onlyReadsMemory()) { // This looks like a vector load. return handleVectorLoadIntrinsic(I); } - if (!OnlyReadsMemory && !WritesMemory) + if (I.doesNotAccessMemory()) if (maybeHandleSimpleNomemIntrinsic(I)) return true; @@ -2493,13 +2510,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Now, get the shadow for the RetVal. if (!I.getType()->isSized()) return; + // Don't emit the epilogue for musttail call returns. + if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return; IRBuilder<> IRBBefore(&I); // Until we have full dynamic coverage, make sure the retval shadow is 0. Value *Base = getShadowPtrForRetval(&I, IRBBefore); IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment); - Instruction *NextInsn = nullptr; + BasicBlock::iterator NextInsn; if (CS.isCall()) { - NextInsn = I.getNextNode(); + NextInsn = ++I.getIterator(); + assert(NextInsn != I.getParent()->end()); } else { BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest(); if (!NormalDest->getSinglePredecessor()) { @@ -2511,10 +2531,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return; } NextInsn = NormalDest->getFirstInsertionPt(); - assert(NextInsn && + assert(NextInsn != NormalDest->end() && "Could not find insertion point for retval shadow load"); } - IRBuilder<> IRBAfter(NextInsn); + IRBuilder<> IRBAfter(&*NextInsn); Value *RetvalShadow = IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter), kShadowTLSAlignment, "_msret"); @@ -2523,10 +2543,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter))); } + bool isAMustTailRetVal(Value *RetVal) { + if (auto *I = dyn_cast<BitCastInst>(RetVal)) { + RetVal = I->getOperand(0); + } + if (auto *I = dyn_cast<CallInst>(RetVal)) { + return I->isMustTailCall(); + } + return false; + } + void visitReturnInst(ReturnInst &I) { IRBuilder<> IRB(&I); Value *RetVal = I.getReturnValue(); if (!RetVal) return; + // Don't emit the epilogue for musttail call returns. + if (isAMustTailRetVal(RetVal)) return; Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); if (CheckReturnValue) { insertShadowCheck(RetVal, &I); @@ -2653,6 +2685,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(&I, getCleanOrigin()); } + void visitCatchSwitchInst(CatchSwitchInst &I) { + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + + void visitFuncletPadInst(FuncletPadInst &I) { + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); } @@ -2696,6 +2738,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Nothing to do here. } + void visitCleanupReturnInst(CleanupReturnInst &CRI) { + DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n"); + // Nothing to do here. + } + + void visitCatchReturnInst(CatchReturnInst &CRI) { + DEBUG(dbgs() << "CatchReturn: " << CRI << "\n"); + // Nothing to do here. + } + void visitInstruction(Instruction &I) { // Everything else: stop propagating and check for poisoned shadow. if (ClDumpStrictInstructions) @@ -2808,6 +2860,8 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVAStartInst(VAStartInst &I) override { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); @@ -2820,6 +2874,8 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVACopyInst(VACopyInst &I) override { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); @@ -2979,6 +3035,242 @@ struct VarArgMIPS64Helper : public VarArgHelper { } }; + +/// \brief AArch64-specific implementation of VarArgHelper. +struct VarArgAArch64Helper : public VarArgHelper { + static const unsigned kAArch64GrArgSize = 56; + static const unsigned kAArch64VrArgSize = 128; + + static const unsigned AArch64GrBegOffset = 0; + static const unsigned AArch64GrEndOffset = kAArch64GrArgSize; + // Make VR space aligned to 16 bytes. + static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8; + static const unsigned AArch64VrEndOffset = AArch64VrBegOffset + + kAArch64VrArgSize; + static const unsigned AArch64VAEndOffset = AArch64VrEndOffset; + + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgOverflowSize; + + SmallVector<CallInst*, 16> VAStartInstrumentationList; + + VarArgAArch64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgOverflowSize(nullptr) {} + + enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + + ArgKind classifyArgument(Value* arg) { + Type *T = arg->getType(); + if (T->isFPOrFPVectorTy()) + return AK_FloatingPoint; + if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) + || (T->isPointerTy())) + return AK_GeneralPurpose; + return AK_Memory; + } + + // The instrumentation stores the argument shadow in a non ABI-specific + // format because it does not know which argument is named (since Clang, + // like x86_64 case, lowers the va_args in the frontend and this pass only + // sees the low level code that deals with va_list internals). + // The first seven GR registers are saved in the first 56 bytes of the + // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then + // the remaining arguments. + // Using constant offset within the va_arg TLS array allows fast copy + // in the finalize instrumentation. + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + unsigned GrOffset = AArch64GrBegOffset; + unsigned VrOffset = AArch64VrBegOffset; + unsigned OverflowOffset = AArch64VAEndOffset; + + const DataLayout &DL = F.getParent()->getDataLayout(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + ArgKind AK = classifyArgument(A); + if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset) + AK = AK_Memory; + if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset) + AK = AK_Memory; + Value *Base; + switch (AK) { + case AK_GeneralPurpose: + Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset); + GrOffset += 8; + break; + case AK_FloatingPoint: + Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset); + VrOffset += 16; + break; + case AK_Memory: + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); + Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); + OverflowOffset += RoundUpToAlignment(ArgSize, 8); + break; + } + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + } + Constant *OverflowSize = + ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset); + IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); + } + + /// Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants (size of va_list). + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */32, /* alignment */8, false); + } + + void visitVACopyInst(VACopyInst &I) override { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants (size of va_list). + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */32, /* alignment */8, false); + } + + // Retrieve a va_list field of 'void*' size. + Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) { + Value *SaveAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, offset)), + Type::getInt64PtrTy(*MS.C)); + return IRB.CreateLoad(SaveAreaPtrPtr); + } + + // Retrieve a va_list field of 'int' size. + Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) { + Value *SaveAreaPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, offset)), + Type::getInt32PtrTy(*MS.C)); + Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr); + return IRB.CreateSExt(SaveArea32, MS.IntptrTy); + } + + void finalizeInstrumentation() override { + assert(!VAArgOverflowSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = + IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), + VAArgOverflowSize); + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize); + Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize); + + // Instrument va_start, copy va_list shadow from the backup copy of + // the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + + Value *VAListTag = OrigInst->getArgOperand(0); + + // The variadic ABI for AArch64 creates two areas to save the incoming + // argument registers (one for 64-bit general register xn-x7 and another + // for 128-bit FP/SIMD vn-v7). + // We need then to propagate the shadow arguments on both regions + // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'. + // The remaning arguments are saved on shadow for 'va::stack'. + // One caveat is it requires only to propagate the non-named arguments, + // however on the call site instrumentation 'all' the arguments are + // saved. So to copy the shadow values from the va_arg TLS array + // we need to adjust the offset for both GR and VR fields based on + // the __{gr,vr}_offs value (since they are stores based on incoming + // named arguments). + + // Read the stack pointer from the va_list. + Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0); + + // Read both the __gr_top and __gr_off and add them up. + Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8); + Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24); + + Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea); + + // Read both the __vr_top and __vr_off and add them up. + Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16); + Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28); + + Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea); + + // It does not know how many named arguments is being used and, on the + // callsite all the arguments were saved. Since __gr_off is defined as + // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic + // argument by ignoring the bytes of shadow from named arguments. + Value *GrRegSaveAreaShadowPtrOff = + IRB.CreateAdd(GrArgSize, GrOffSaveArea); + + Value *GrRegSaveAreaShadowPtr = + MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + GrRegSaveAreaShadowPtrOff); + Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff); + + IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8); + + // Again, but for FP/SIMD values. + Value *VrRegSaveAreaShadowPtrOff = + IRB.CreateAdd(VrArgSize, VrOffSaveArea); + + Value *VrRegSaveAreaShadowPtr = + MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *VrSrcPtr = IRB.CreateInBoundsGEP( + IRB.getInt8Ty(), + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + IRB.getInt32(AArch64VrBegOffset)), + VrRegSaveAreaShadowPtrOff); + Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff); + + IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8); + + // And finally for remaining arguments. + Value *StackSaveAreaShadowPtr = + MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *StackSrcPtr = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + IRB.getInt32(AArch64VAEndOffset)); + + IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr, + VAArgOverflowSize, 16); + } + } +}; + /// \brief A no-op implementation of VarArgHelper. struct VarArgNoOpHelper : public VarArgHelper { VarArgNoOpHelper(Function &F, MemorySanitizer &MS, @@ -3003,11 +3295,13 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, else if (TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el) return new VarArgMIPS64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == llvm::Triple::aarch64) + return new VarArgAArch64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } -} // namespace +} // anonymous namespace bool MemorySanitizer::runOnFunction(Function &F) { if (&F == MsanCtorFunction) diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp new file mode 100644 index 0000000..4b59b93 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -0,0 +1,718 @@ +//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements PGO instrumentation using a minimum spanning tree based +// on the following paper: +// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points +// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, +// Issue 3, pp 313-322 +// The idea of the algorithm based on the fact that for each node (except for +// the entry and exit), the sum of incoming edge counts equals the sum of +// outgoing edge counts. The count of edge on spanning tree can be derived from +// those edges not on the spanning tree. Knuth proves this method instruments +// the minimum number of edges. +// +// The minimal spanning tree here is actually a maximum weight tree -- on-tree +// edges have higher frequencies (more likely to execute). The idea is to +// instrument those less frequently executed edges to reduce the runtime +// overhead of instrumented binaries. +// +// This file contains two passes: +// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge +// count profile, and +// (2) Pass PGOInstrumentationUse which reads the edge count profile and +// annotates the branch weights. +// To get the precise counter information, These two passes need to invoke at +// the same compilation point (so they see the same IR). For pass +// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For +// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and +// the profile is opened in module level and passed to each PGOUseFunc instance. +// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put +// in class FuncPGOInstrumentation. +// +// Class PGOEdge represents a CFG edge and some auxiliary information. Class +// BBInfo contains auxiliary information for each BB. These two classes are used +// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived +// class of PGOEdge and BBInfo, respectively. They contains extra data structure +// used in populating profile counters. +// The MST implementation is in Class CFGMST (CFGMST.h). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation.h" +#include "CFGMST.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/JamCRC.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pgo-instrumentation" + +STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); +STATISTIC(NumOfPGOEdge, "Number of edges."); +STATISTIC(NumOfPGOBB, "Number of basic-blocks."); +STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); +STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); +STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); +STATISTIC(NumOfPGOMissing, "Number of functions without profile."); + +// Command line option to specify the file to read profile from. This is +// mainly used for testing. +static cl::opt<std::string> + PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path of profile data file. This is" + "mainly for test purpose.")); + +namespace { +class PGOInstrumentationGen : public ModulePass { +public: + static char ID; + + PGOInstrumentationGen() : ModulePass(ID) { + initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOInstrumentationGenPass"; + } + +private: + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + } +}; + +class PGOInstrumentationUse : public ModulePass { +public: + static char ID; + + // Provide the profile filename as the parameter. + PGOInstrumentationUse(std::string Filename = "") + : ModulePass(ID), ProfileFileName(Filename) { + if (!PGOTestProfileFile.empty()) + ProfileFileName = PGOTestProfileFile; + initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOInstrumentationUsePass"; + } + +private: + std::string ProfileFileName; + std::unique_ptr<IndexedInstrProfReader> PGOReader; + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + } +}; +} // end anonymous namespace + +char PGOInstrumentationGen::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen", + "PGO instrumentation.", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen", + "PGO instrumentation.", false, false) + +ModulePass *llvm::createPGOInstrumentationGenPass() { + return new PGOInstrumentationGen(); +} + +char PGOInstrumentationUse::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use", + "Read PGO instrumentation profile.", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use", + "Read PGO instrumentation profile.", false, false) + +ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) { + return new PGOInstrumentationUse(Filename.str()); +} + +namespace { +/// \brief An MST based instrumentation for PGO +/// +/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO +/// in the function level. +struct PGOEdge { + // This class implements the CFG edges. Note the CFG can be a multi-graph. + // So there might be multiple edges with same SrcBB and DestBB. + const BasicBlock *SrcBB; + const BasicBlock *DestBB; + uint64_t Weight; + bool InMST; + bool Removed; + bool IsCritical; + PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), + IsCritical(false) {} + // Return the information string of an edge. + const std::string infoString() const { + return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + + (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); + } +}; + +// This class stores the auxiliary information for each BB. +struct BBInfo { + BBInfo *Group; + uint32_t Index; + uint32_t Rank; + + BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} + + // Return the information string of this object. + const std::string infoString() const { + return (Twine("Index=") + Twine(Index)).str(); + } +}; + +// This class implements the CFG edges. Note the CFG can be a multi-graph. +template <class Edge, class BBInfo> class FuncPGOInstrumentation { +private: + Function &F; + void computeCFGHash(); + +public: + std::string FuncName; + GlobalVariable *FuncNameVar; + // CFG hash value for this function. + uint64_t FunctionHash; + + // The Minimum Spanning Tree of function CFG. + CFGMST<Edge, BBInfo> MST; + + // Give an edge, find the BB that will be instrumented. + // Return nullptr if there is no BB to be instrumented. + BasicBlock *getInstrBB(Edge *E); + + // Return the auxiliary BB information. + BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } + + // Dump edges and BB information. + void dumpInfo(std::string Str = "") const { + MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + + Twine(FunctionHash) + "\t" + Str); + } + + FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false, + BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), FunctionHash(0), MST(F, BPI, BFI) { + FuncName = getPGOFuncName(F); + computeCFGHash(); + DEBUG(dumpInfo("after CFGMST")); + + NumOfPGOBB += MST.BBInfos.size(); + for (auto &E : MST.AllEdges) { + if (E->Removed) + continue; + NumOfPGOEdge++; + if (!E->InMST) + NumOfPGOInstrument++; + } + + if (CreateGlobalVar) + FuncNameVar = createPGOFuncNameVar(F, FuncName); + }; +}; + +// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index +// value of each BB in the CFG. The higher 32 bits record the number of edges. +template <class Edge, class BBInfo> +void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { + std::vector<char> Indexes; + JamCRC JC; + for (auto &BB : F) { + const TerminatorInst *TI = BB.getTerminator(); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + BasicBlock *Succ = TI->getSuccessor(I); + uint32_t Index = getBBInfo(Succ).Index; + for (int J = 0; J < 4; J++) + Indexes.push_back((char)(Index >> (J * 8))); + } + } + JC.update(Indexes); + FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); +} + +// Given a CFG E to be instrumented, find which BB to place the instrumented +// code. The function will split the critical edge if necessary. +template <class Edge, class BBInfo> +BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { + if (E->InMST || E->Removed) + return nullptr; + + BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); + BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); + // For a fake edge, instrument the real BB. + if (SrcBB == nullptr) + return DestBB; + if (DestBB == nullptr) + return SrcBB; + + // Instrument the SrcBB if it has a single successor, + // otherwise, the DestBB if this is not a critical edge. + TerminatorInst *TI = SrcBB->getTerminator(); + if (TI->getNumSuccessors() <= 1) + return SrcBB; + if (!E->IsCritical) + return DestBB; + + // For a critical edge, we have to split. Instrument the newly + // created BB. + NumOfPGOSplit++; + DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " + << getBBInfo(DestBB).Index << "\n"); + unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); + assert(InstrBB && "Critical edge is not split"); + + E->Removed = true; + return InstrBB; +} + +// Visit all edge and instrument the edges not in MST. +// Critical edges will be split. +static void instrumentOneFunc(Function &F, Module *M, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + unsigned NumCounters = 0; + FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, true, BPI, BFI); + for (auto &E : FuncInfo.MST.AllEdges) { + if (!E->InMST && !E->Removed) + NumCounters++; + } + + uint32_t I = 0; + for (auto &E : FuncInfo.MST.AllEdges) { + BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); + if (!InstrBB) + continue; + + IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); + assert(Builder.GetInsertPoint() != InstrBB->end() && + "Cannot get the Instrumentation point"); + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), + {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), + Builder.getInt32(I++)}); + } +} + +// This class represents a CFG edge in profile use compilation. +struct PGOUseEdge : public PGOEdge { + bool CountValid; + uint64_t CountValue; + PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} + + // Set edge count value + void setEdgeCount(uint64_t Value) { + CountValue = Value; + CountValid = true; + } + + // Return the information string for this object. + const std::string infoString() const { + if (!CountValid) + return PGOEdge::infoString(); + return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)).str(); + } +}; + +typedef SmallVector<PGOUseEdge *, 2> DirectEdges; + +// This class stores the auxiliary information for each BB. +struct UseBBInfo : public BBInfo { + uint64_t CountValue; + bool CountValid; + int32_t UnknownCountInEdge; + int32_t UnknownCountOutEdge; + DirectEdges InEdges; + DirectEdges OutEdges; + UseBBInfo(unsigned IX) + : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), + UnknownCountOutEdge(0) {} + UseBBInfo(unsigned IX, uint64_t C) + : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), + UnknownCountOutEdge(0) {} + + // Set the profile count value for this BB. + void setBBInfoCount(uint64_t Value) { + CountValue = Value; + CountValid = true; + } + + // Return the information string of this object. + const std::string infoString() const { + if (!CountValid) + return BBInfo::infoString(); + return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); + } +}; + +// Sum up the count values for all the edges. +static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { + uint64_t Total = 0; + for (auto &E : Edges) { + if (E->Removed) + continue; + Total += E->CountValue; + } + return Total; +} + +class PGOUseFunc { +private: + Function &F; + Module *M; + // This member stores the shared information with class PGOGenFunc. + FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; + + // Return the auxiliary BB information. + UseBBInfo &getBBInfo(const BasicBlock *BB) const { + return FuncInfo.getBBInfo(BB); + } + + // The maximum count value in the profile. This is only used in PGO use + // compilation. + uint64_t ProgramMaxCount; + + // Find the Instrumented BB and set the value. + void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); + + // Set the edge counter value for the unknown edge -- there should be only + // one unknown edge. + void setEdgeCount(DirectEdges &Edges, uint64_t Value); + + // Return FuncName string; + const std::string getFuncName() const { return FuncInfo.FuncName; } + + // Set the hot/cold inline hints based on the count values. + // FIXME: This function should be removed once the functionality in + // the inliner is implemented. + void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { + if (ProgramMaxCount == 0) + return; + // Threshold of the hot functions. + const BranchProbability HotFunctionThreshold(1, 100); + // Threshold of the cold functions. + const BranchProbability ColdFunctionThreshold(2, 10000); + if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) + F.addFnAttr(llvm::Attribute::InlineHint); + else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) + F.addFnAttr(llvm::Attribute::Cold); + } + +public: + PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {} + + // Read counts for the instrumented BB from profile. + bool readCounters(IndexedInstrProfReader *PGOReader); + + // Populate the counts for all BBs. + void populateCounters(); + + // Set the branch weights based on the count values. + void setBranchWeights(); +}; + +// Visit all the edges and assign the count value for the instrumented +// edges and the BB. +void PGOUseFunc::setInstrumentedCounts( + const std::vector<uint64_t> &CountFromProfile) { + + // Use a worklist as we will update the vector during the iteration. + std::vector<PGOUseEdge *> WorkList; + for (auto &E : FuncInfo.MST.AllEdges) + WorkList.push_back(E.get()); + + uint32_t I = 0; + for (auto &E : WorkList) { + BasicBlock *InstrBB = FuncInfo.getInstrBB(E); + if (!InstrBB) + continue; + uint64_t CountValue = CountFromProfile[I++]; + if (!E->Removed) { + getBBInfo(InstrBB).setBBInfoCount(CountValue); + E->setEdgeCount(CountValue); + continue; + } + + // Need to add two new edges. + BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); + BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); + // Add new edge of SrcBB->InstrBB. + PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); + NewEdge.setEdgeCount(CountValue); + // Add new edge of InstrBB->DestBB. + PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); + NewEdge1.setEdgeCount(CountValue); + NewEdge1.InMST = true; + getBBInfo(InstrBB).setBBInfoCount(CountValue); + } +} + +// Set the count value for the unknown edge. There should be one and only one +// unknown edge in Edges vector. +void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { + for (auto &E : Edges) { + if (E->CountValid) + continue; + E->setEdgeCount(Value); + + getBBInfo(E->SrcBB).UnknownCountOutEdge--; + getBBInfo(E->DestBB).UnknownCountInEdge--; + return; + } + llvm_unreachable("Cannot find the unknown count edge"); +} + +// Read the profile from ProfileFileName and assign the value to the +// instrumented BB and the edges. This function also updates ProgramMaxCount. +// Return true if the profile are successfully read, and false on errors. +bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { + auto &Ctx = M->getContext(); + ErrorOr<InstrProfRecord> Result = + PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); + if (std::error_code EC = Result.getError()) { + if (EC == instrprof_error::unknown_function) + NumOfPGOMissing++; + else if (EC == instrprof_error::hash_mismatch || + EC == llvm::instrprof_error::malformed) + NumOfPGOMismatch++; + + std::string Msg = EC.message() + std::string(" ") + F.getName().str(); + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + return false; + } + std::vector<uint64_t> &CountFromProfile = Result.get().Counts; + + NumOfPGOFunc++; + DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); + uint64_t ValueSum = 0; + for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { + DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); + ValueSum += CountFromProfile[I]; + } + + DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); + + getBBInfo(nullptr).UnknownCountOutEdge = 2; + getBBInfo(nullptr).UnknownCountInEdge = 2; + + setInstrumentedCounts(CountFromProfile); + ProgramMaxCount = PGOReader->getMaximumFunctionCount(); + return true; +} + +// Populate the counters from instrumented BBs to all BBs. +// In the end of this operation, all BBs should have a valid count value. +void PGOUseFunc::populateCounters() { + // First set up Count variable for all BBs. + for (auto &E : FuncInfo.MST.AllEdges) { + if (E->Removed) + continue; + + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + UseBBInfo &SrcInfo = getBBInfo(SrcBB); + UseBBInfo &DestInfo = getBBInfo(DestBB); + SrcInfo.OutEdges.push_back(E.get()); + DestInfo.InEdges.push_back(E.get()); + SrcInfo.UnknownCountOutEdge++; + DestInfo.UnknownCountInEdge++; + + if (!E->CountValid) + continue; + DestInfo.UnknownCountInEdge--; + SrcInfo.UnknownCountOutEdge--; + } + + bool Changes = true; + unsigned NumPasses = 0; + while (Changes) { + NumPasses++; + Changes = false; + + // For efficient traversal, it's better to start from the end as most + // of the instrumented edges are at the end. + for (auto &BB : reverse(F)) { + UseBBInfo &Count = getBBInfo(&BB); + if (!Count.CountValid) { + if (Count.UnknownCountOutEdge == 0) { + Count.CountValue = sumEdgeCount(Count.OutEdges); + Count.CountValid = true; + Changes = true; + } else if (Count.UnknownCountInEdge == 0) { + Count.CountValue = sumEdgeCount(Count.InEdges); + Count.CountValid = true; + Changes = true; + } + } + if (Count.CountValid) { + if (Count.UnknownCountOutEdge == 1) { + uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges); + setEdgeCount(Count.OutEdges, Total); + Changes = true; + } + if (Count.UnknownCountInEdge == 1) { + uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges); + setEdgeCount(Count.InEdges, Total); + Changes = true; + } + } + } + } + + DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); + // Assert every BB has a valid counter. + uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; + uint64_t FuncMaxCount = FuncEntryCount; + for (auto &BB : F) { + assert(getBBInfo(&BB).CountValid && "BB count is not valid"); + uint64_t Count = getBBInfo(&BB).CountValue; + if (Count > FuncMaxCount) + FuncMaxCount = Count; + } + applyFunctionAttributes(FuncEntryCount, FuncMaxCount); + + DEBUG(FuncInfo.dumpInfo("after reading profile.")); +} + +// Assign the scaled count values to the BB with multiple out edges. +void PGOUseFunc::setBranchWeights() { + // Generate MD_prof metadata for every branch instruction. + DEBUG(dbgs() << "\nSetting branch weights.\n"); + MDBuilder MDB(M->getContext()); + for (auto &BB : F) { + TerminatorInst *TI = BB.getTerminator(); + if (TI->getNumSuccessors() < 2) + continue; + if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + continue; + if (getBBInfo(&BB).CountValue == 0) + continue; + + // We have a non-zero Branch BB. + const UseBBInfo &BBCountInfo = getBBInfo(&BB); + unsigned Size = BBCountInfo.OutEdges.size(); + SmallVector<unsigned, 2> EdgeCounts(Size, 0); + uint64_t MaxCount = 0; + for (unsigned s = 0; s < Size; s++) { + const PGOUseEdge *E = BBCountInfo.OutEdges[s]; + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + if (DestBB == 0) + continue; + unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + uint64_t EdgeCount = E->CountValue; + if (EdgeCount > MaxCount) + MaxCount = EdgeCount; + EdgeCounts[SuccNum] = EdgeCount; + } + assert(MaxCount > 0 && "Bad max count"); + uint64_t Scale = calculateCountScale(MaxCount); + SmallVector<unsigned, 4> Weights; + for (const auto &ECI : EdgeCounts) + Weights.push_back(scaleBranchCount(ECI, Scale)); + + TI->setMetadata(llvm::LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + DEBUG(dbgs() << "Weight is: "; + for (const auto &W : Weights) { dbgs() << W << " "; } + dbgs() << "\n";); + } +} +} // end anonymous namespace + +bool PGOInstrumentationGen::runOnModule(Module &M) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + BranchProbabilityInfo *BPI = + &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI()); + BlockFrequencyInfo *BFI = + &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI()); + instrumentOneFunc(F, &M, BPI, BFI); + } + return true; +} + +static void setPGOCountOnFunc(PGOUseFunc &Func, + IndexedInstrProfReader *PGOReader) { + if (Func.readCounters(PGOReader)) { + Func.populateCounters(); + Func.setBranchWeights(); + } +} + +bool PGOInstrumentationUse::runOnModule(Module &M) { + DEBUG(dbgs() << "Read in profile counters: "); + auto &Ctx = M.getContext(); + // Read the counter array from file. + auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); + if (std::error_code EC = ReaderOrErr.getError()) { + Ctx.diagnose( + DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message())); + return false; + } + + PGOReader = std::move(ReaderOrErr.get()); + if (!PGOReader) { + Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), + "Cannot get PGOReader")); + return false; + } + + for (auto &F : M) { + if (F.isDeclaration()) + continue; + BranchProbabilityInfo *BPI = + &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI()); + BlockFrequencyInfo *BFI = + &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI()); + PGOUseFunc Func(F, &M, BPI, BFI); + setPGOCountOnFunc(Func, PGOReader.get()); + } + return true; +} diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp index 6b185a2..abed465 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp @@ -18,8 +18,9 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -37,6 +38,8 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_os_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -44,6 +47,17 @@ using namespace llvm; #define DEBUG_TYPE "safestack" +enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP }; + +static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage", + cl::Hidden, cl::init(ThreadLocalUSP), + cl::desc("Type of storage for the unsafe stack pointer"), + cl::values(clEnumValN(ThreadLocalUSP, "thread-local", + "Thread-local storage"), + clEnumValN(SingleThreadUSP, "single-thread", + "Non-thread-local storage"), + clEnumValEnd)); + namespace llvm { STATISTIC(NumFunctions, "Total number of functions"); @@ -54,118 +68,48 @@ STATISTIC(NumUnsafeStackRestorePointsFunctions, STATISTIC(NumAllocas, "Total number of allocas"); STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas"); STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); +STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); } // namespace llvm namespace { -/// Check whether a given alloca instruction (AI) should be put on the safe -/// stack or not. The function analyzes all uses of AI and checks whether it is -/// only accessed in a memory safe way (as decided statically). -bool IsSafeStackAlloca(const AllocaInst *AI) { - // Go through all uses of this alloca and check whether all accesses to the - // allocated object are statically known to be memory safe and, hence, the - // object can be placed on the safe stack. - - SmallPtrSet<const Value *, 16> Visited; - SmallVector<const Instruction *, 8> WorkList; - WorkList.push_back(AI); +/// Rewrite an SCEV expression for a memory access address to an expression that +/// represents offset from the given alloca. +/// +/// The implementation simply replaces all mentions of the alloca with zero. +class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> { + const Value *AllocaPtr; - // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. - while (!WorkList.empty()) { - const Instruction *V = WorkList.pop_back_val(); - for (const Use &UI : V->uses()) { - auto I = cast<const Instruction>(UI.getUser()); - assert(V == UI.get()); - - switch (I->getOpcode()) { - case Instruction::Load: - // Loading from a pointer is safe. - break; - case Instruction::VAArg: - // "va-arg" from a pointer is safe. - break; - case Instruction::Store: - if (V == I->getOperand(0)) - // Stored the pointer - conservatively assume it may be unsafe. - return false; - // Storing to the pointee is safe. - break; - - case Instruction::GetElementPtr: - if (!cast<const GetElementPtrInst>(I)->hasAllConstantIndices()) - // GEP with non-constant indices can lead to memory errors. - // This also applies to inbounds GEPs, as the inbounds attribute - // represents an assumption that the address is in bounds, rather than - // an assertion that it is. - return false; - - // We assume that GEP on static alloca with constant indices is safe, - // otherwise a compiler would detect it and warn during compilation. - - if (!isa<const ConstantInt>(AI->getArraySize())) - // However, if the array size itself is not constant, the access - // might still be unsafe at runtime. - return false; - - /* fallthrough */ - - case Instruction::BitCast: - case Instruction::IntToPtr: - case Instruction::PHI: - case Instruction::PtrToInt: - case Instruction::Select: - // The object can be safe or not, depending on how the result of the - // instruction is used. - if (Visited.insert(I).second) - WorkList.push_back(cast<const Instruction>(I)); - break; - - case Instruction::Call: - case Instruction::Invoke: { - // FIXME: add support for memset and memcpy intrinsics. - ImmutableCallSite CS(I); - - // LLVM 'nocapture' attribute is only set for arguments whose address - // is not stored, passed around, or used in any other non-trivial way. - // We assume that passing a pointer to an object as a 'nocapture' - // argument is safe. - // FIXME: a more precise solution would require an interprocedural - // analysis here, which would look at all uses of an argument inside - // the function being called. - ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.doesNotCapture(A - B)) - // The parameter is not marked 'nocapture' - unsafe. - return false; - continue; - } +public: + AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) + : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - default: - // The object is unsafe if it is used in any other way. - return false; - } - } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (Expr->getValue() == AllocaPtr) + return SE.getZero(Expr->getType()); + return Expr; } +}; - // All uses of the alloca are safe, we can place it on the safe stack. - return true; -} - -/// The SafeStack pass splits the stack of each function into the -/// safe stack, which is only accessed through memory safe dereferences -/// (as determined statically), and the unsafe stack, which contains all -/// local variables that are accessed in unsafe ways. +/// The SafeStack pass splits the stack of each function into the safe +/// stack, which is only accessed through memory safe dereferences (as +/// determined statically), and the unsafe stack, which contains all +/// local variables that are accessed in ways that we can't prove to +/// be safe. class SafeStack : public FunctionPass { + const TargetMachine *TM; + const TargetLoweringBase *TL; const DataLayout *DL; + ScalarEvolution *SE; Type *StackPtrTy; Type *IntPtrTy; Type *Int32Ty; Type *Int8Ty; - Constant *UnsafeStackPtr = nullptr; + Value *UnsafeStackPtr = nullptr; /// Unsafe stack alignment. Each stack frame must ensure that the stack is /// aligned to this value. We need to re-align the unsafe stack if the @@ -175,26 +119,31 @@ class SafeStack : public FunctionPass { /// might expect to appear on the stack on most common targets. enum { StackAlignment = 16 }; - /// \brief Build a constant representing a pointer to the unsafe stack - /// pointer. - Constant *getOrCreateUnsafeStackPtr(Module &M); + /// \brief Build a value representing a pointer to the unsafe stack pointer. + Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F); /// \brief Find all static allocas, dynamic allocas, return instructions and /// stack restore points (exception unwind blocks and setjmp calls) in the /// given function and append them to the respective vectors. void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, SmallVectorImpl<AllocaInst *> &DynamicAllocas, + SmallVectorImpl<Argument *> &ByValArguments, SmallVectorImpl<ReturnInst *> &Returns, SmallVectorImpl<Instruction *> &StackRestorePoints); + /// \brief Calculate the allocation size of a given alloca. Returns 0 if the + /// size can not be statically determined. + uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); + /// \brief Allocate space for all static allocas in \p StaticAllocas, /// replace allocas with pointers into the unsafe stack and generate code to /// restore the stack pointer before all return instructions in \p Returns. /// /// \returns A pointer to the top of the unsafe stack after all unsafe static /// allocas are allocated. - Value *moveStaticAllocasToUnsafeStack(Function &F, + Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas, + ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns); /// \brief Generate code to restore the stack after all stack restore points @@ -203,7 +152,7 @@ class SafeStack : public FunctionPass { /// \returns A local variable in which to maintain the dynamic top of the /// unsafe stack if needed. AllocaInst * - createStackRestorePoints(Function &F, + createStackRestorePoints(IRBuilder<> &IRB, Function &F, ArrayRef<Instruction *> StackRestorePoints, Value *StaticTop, bool NeedDynamicTop); @@ -214,17 +163,26 @@ class SafeStack : public FunctionPass { AllocaInst *DynamicTop, ArrayRef<AllocaInst *> DynamicAllocas); + bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize); + + bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, + const Value *AllocaPtr, uint64_t AllocaSize); + bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, + uint64_t AllocaSize); + public: static char ID; // Pass identification, replacement for typeid. - SafeStack() : FunctionPass(ID), DL(nullptr) { + SafeStack(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) { initializeSafeStackPass(*PassRegistry::getPassRegistry()); } + SafeStack() : SafeStack(nullptr) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ScalarEvolutionWrapperPass>(); } - virtual bool doInitialization(Module &M) { + bool doInitialization(Module &M) override { DL = &M.getDataLayout(); StackPtrTy = Type::getInt8PtrTy(M.getContext()); @@ -235,51 +193,203 @@ public: return false; } - bool runOnFunction(Function &F); - + bool runOnFunction(Function &F) override; }; // class SafeStack -Constant *SafeStack::getOrCreateUnsafeStackPtr(Module &M) { - // The unsafe stack pointer is stored in a global variable with a magic name. - const char *kUnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; +uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { + uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType()); + if (AI->isArrayAllocation()) { + auto C = dyn_cast<ConstantInt>(AI->getArraySize()); + if (!C) + return 0; + Size *= C->getZExtValue(); + } + return Size; +} + +bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, + const Value *AllocaPtr, uint64_t AllocaSize) { + AllocaOffsetRewriter Rewriter(*SE, AllocaPtr); + const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr)); + + uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType()); + ConstantRange AccessStartRange = SE->getUnsignedRange(Expr); + ConstantRange SizeRange = + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize)); + ConstantRange AccessRange = AccessStartRange.add(SizeRange); + ConstantRange AllocaRange = + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize)); + bool Safe = AllocaRange.contains(AccessRange); + + DEBUG(dbgs() << "[SafeStack] " + << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") + << *AllocaPtr << "\n" + << " Access " << *Addr << "\n" + << " SCEV " << *Expr + << " U: " << SE->getUnsignedRange(Expr) + << ", S: " << SE->getSignedRange(Expr) << "\n" + << " Range " << AccessRange << "\n" + << " AllocaRange " << AllocaRange << "\n" + << " " << (Safe ? "safe" : "unsafe") << "\n"); + + return Safe; +} + +bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, + const Value *AllocaPtr, + uint64_t AllocaSize) { + // All MemIntrinsics have destination address in Arg0 and size in Arg2. + if (MI->getRawDest() != U) return true; + const auto *Len = dyn_cast<ConstantInt>(MI->getLength()); + // Non-constant size => unsafe. FIXME: try SCEV getRange. + if (!Len) return false; + return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize); +} + +/// Check whether a given allocation must be put on the safe +/// stack or not. The function analyzes all uses of AI and checks whether it is +/// only accessed in a memory safe way (as decided statically). +bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { + // Go through all uses of this alloca and check whether all accesses to the + // allocated object are statically known to be memory safe and, hence, the + // object can be placed on the safe stack. + SmallPtrSet<const Value *, 16> Visited; + SmallVector<const Value *, 8> WorkList; + WorkList.push_back(AllocaPtr); + + // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. + while (!WorkList.empty()) { + const Value *V = WorkList.pop_back_val(); + for (const Use &UI : V->uses()) { + auto I = cast<const Instruction>(UI.getUser()); + assert(V == UI.get()); + + switch (I->getOpcode()) { + case Instruction::Load: { + if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr, + AllocaSize)) + return false; + break; + } + case Instruction::VAArg: + // "va-arg" from a pointer is safe. + break; + case Instruction::Store: { + if (V == I->getOperand(0)) { + // Stored the pointer - conservatively assume it may be unsafe. + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n store of address: " << *I << "\n"); + return false; + } + + if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()), + AllocaPtr, AllocaSize)) + return false; + break; + } + case Instruction::Ret: { + // Information leak. + return false; + } + + case Instruction::Call: + case Instruction::Invoke: { + ImmutableCallSite CS(I); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + + if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe memintrinsic: " << *I + << "\n"); + return false; + } + continue; + } + // LLVM 'nocapture' attribute is only set for arguments whose address + // is not stored, passed around, or used in any other non-trivial way. + // We assume that passing a pointer to an object as a 'nocapture + // readnone' argument is safe. + // FIXME: a more precise solution would require an interprocedural + // analysis here, which would look at all uses of an argument inside + // the function being called. + ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V) + if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || + CS.doesNotAccessMemory()))) { + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe call: " << *I << "\n"); + return false; + } + continue; + } + + default: + if (Visited.insert(I).second) + WorkList.push_back(cast<const Instruction>(I)); + } + } + } + + // All uses of the alloca are safe, we can place it on the safe stack. + return true; +} + +Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) { + // Check if there is a target-specific location for the unsafe stack pointer. + if (TL) + if (Value *V = TL->getSafeStackPointerLocation(IRB)) + return V; + + // Otherwise, assume the target links with compiler-rt, which provides a + // thread-local variable with a magic name. + Module &M = *F.getParent(); + const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; auto UnsafeStackPtr = - dyn_cast_or_null<GlobalVariable>(M.getNamedValue(kUnsafeStackPtrVar)); + dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar)); + + bool UseTLS = USPStorage == ThreadLocalUSP; if (!UnsafeStackPtr) { + auto TLSModel = UseTLS ? + GlobalValue::InitialExecTLSModel : + GlobalValue::NotThreadLocal; // The global variable is not defined yet, define it ourselves. - // We use the initial-exec TLS model because we do not support the variable - // living anywhere other than in the main executable. + // We use the initial-exec TLS model because we do not support the + // variable living anywhere other than in the main executable. UnsafeStackPtr = new GlobalVariable( - /*Module=*/M, /*Type=*/StackPtrTy, - /*isConstant=*/false, /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, /*Name=*/kUnsafeStackPtrVar, - /*InsertBefore=*/nullptr, - /*ThreadLocalMode=*/GlobalValue::InitialExecTLSModel); + M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, + UnsafeStackPtrVar, nullptr, TLSModel); } else { // The variable exists, check its type and attributes. - if (UnsafeStackPtr->getValueType() != StackPtrTy) { - report_fatal_error(Twine(kUnsafeStackPtrVar) + " must have void* type"); - } - - if (!UnsafeStackPtr->isThreadLocal()) { - report_fatal_error(Twine(kUnsafeStackPtrVar) + " must be thread-local"); - } + if (UnsafeStackPtr->getValueType() != StackPtrTy) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type"); + if (UseTLS != UnsafeStackPtr->isThreadLocal()) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must " + + (UseTLS ? "" : "not ") + "be thread-local"); } - return UnsafeStackPtr; } void SafeStack::findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, SmallVectorImpl<AllocaInst *> &DynamicAllocas, + SmallVectorImpl<Argument *> &ByValArguments, SmallVectorImpl<ReturnInst *> &Returns, SmallVectorImpl<Instruction *> &StackRestorePoints) { - for (Instruction &I : inst_range(&F)) { + for (Instruction &I : instructions(&F)) { if (auto AI = dyn_cast<AllocaInst>(&I)) { ++NumAllocas; - if (IsSafeStackAlloca(AI)) + uint64_t Size = getStaticAllocaAllocationSize(AI); + if (IsSafeStackAlloca(AI, Size)) continue; if (AI->isStaticAlloca()) { @@ -304,19 +414,26 @@ void SafeStack::findInsts(Function &F, "gcroot intrinsic not compatible with safestack attribute"); } } + for (Argument &Arg : F.args()) { + if (!Arg.hasByValAttr()) + continue; + uint64_t Size = + DL->getTypeStoreSize(Arg.getType()->getPointerElementType()); + if (IsSafeStackAlloca(&Arg, Size)) + continue; + + ++NumUnsafeByValArguments; + ByValArguments.push_back(&Arg); + } } AllocaInst * -SafeStack::createStackRestorePoints(Function &F, +SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, ArrayRef<Instruction *> StackRestorePoints, Value *StaticTop, bool NeedDynamicTop) { if (StackRestorePoints.empty()) return nullptr; - IRBuilder<> IRB(StaticTop - ? cast<Instruction>(StaticTop)->getNextNode() - : (Instruction *)F.getEntryBlock().getFirstInsertionPt()); - // We need the current value of the shadow stack pointer to restore // after longjmp or exception catching. @@ -342,7 +459,7 @@ SafeStack::createStackRestorePoints(Function &F, for (Instruction *I : StackRestorePoints) { ++NumUnsafeStackRestorePoints; - IRB.SetInsertPoint(cast<Instruction>(I->getNextNode())); + IRB.SetInsertPoint(I->getNextNode()); Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop; IRB.CreateStore(CurrentTop, UnsafeStackPtr); } @@ -350,14 +467,12 @@ SafeStack::createStackRestorePoints(Function &F, return DynamicTop; } -Value * -SafeStack::moveStaticAllocasToUnsafeStack(Function &F, - ArrayRef<AllocaInst *> StaticAllocas, - ArrayRef<ReturnInst *> Returns) { - if (StaticAllocas.empty()) +Value *SafeStack::moveStaticAllocasToUnsafeStack( + IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas, + ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns) { + if (StaticAllocas.empty() && ByValArguments.empty()) return nullptr; - IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt()); DIBuilder DIB(*F.getParent()); // We explicitly compute and set the unsafe stack layout for all unsafe @@ -377,6 +492,13 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, // Compute maximum alignment among static objects on the unsafe stack. unsigned MaxAlignment = 0; + for (Argument *Arg : ByValArguments) { + Type *Ty = Arg->getType()->getPointerElementType(); + unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + Arg->getParamAlignment()); + if (Align > MaxAlignment) + MaxAlignment = Align; + } for (AllocaInst *AI : StaticAllocas) { Type *Ty = AI->getAllocatedType(); unsigned Align = @@ -388,22 +510,51 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, if (MaxAlignment > StackAlignment) { // Re-align the base pointer according to the max requested alignment. assert(isPowerOf2_32(MaxAlignment)); - IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode())); + IRB.SetInsertPoint(BasePointer->getNextNode()); BasePointer = cast<Instruction>(IRB.CreateIntToPtr( IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))), StackPtrTy)); } - // Allocate space for every unsafe static AllocaInst on the unsafe stack. int64_t StaticOffset = 0; // Current stack top. + IRB.SetInsertPoint(BasePointer->getNextNode()); + + for (Argument *Arg : ByValArguments) { + Type *Ty = Arg->getType()->getPointerElementType(); + + uint64_t Size = DL->getTypeStoreSize(Ty); + if (Size == 0) + Size = 1; // Don't create zero-sized stack objects. + + // Ensure the object is properly aligned. + unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + Arg->getParamAlignment()); + + // Add alignment. + // NOTE: we ensure that BasePointer itself is aligned to >= Align. + StaticOffset += Size; + StaticOffset = RoundUpToAlignment(StaticOffset, Align); + + Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* + ConstantInt::get(Int32Ty, -StaticOffset)); + Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(), + Arg->getName() + ".unsafe-byval"); + + // Replace alloc with the new location. + replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, + /*Deref=*/true, -StaticOffset); + Arg->replaceAllUsesWith(NewArg); + IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); + IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); + } + + // Allocate space for every unsafe static AllocaInst on the unsafe stack. for (AllocaInst *AI : StaticAllocas) { IRB.SetInsertPoint(AI); - auto CArraySize = cast<ConstantInt>(AI->getArraySize()); Type *Ty = AI->getAllocatedType(); - - uint64_t Size = DL->getTypeAllocSize(Ty) * CArraySize->getZExtValue(); + uint64_t Size = getStaticAllocaAllocationSize(AI); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. @@ -423,7 +574,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, cast<Instruction>(NewAI)->takeName(AI); // Replace alloc with the new location. - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -StaticOffset); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } @@ -434,7 +585,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment); // Update shadow stack pointer in the function epilogue. - IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode())); + IRB.SetInsertPoint(BasePointer->getNextNode()); Value *StaticTop = IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset), @@ -478,7 +629,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (DynamicTop) IRB.CreateStore(NewTop, DynamicTop); - Value *NewAI = IRB.CreateIntToPtr(SP, AI->getType()); + Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType()); if (AI->hasName() && isa<Instruction>(NewAI)) NewAI->takeName(AI); @@ -513,8 +664,6 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( } bool SafeStack::runOnFunction(Function &F) { - auto AA = &getAnalysis<AliasAnalysis>(); - DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); if (!F.hasFnAttribute(Attribute::SafeStack)) { @@ -529,6 +678,9 @@ bool SafeStack::runOnFunction(Function &F) { return false; } + TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr; + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + { // Make sure the regular stack protector won't run on this function // (safestack attribute takes precedence). @@ -541,16 +693,11 @@ bool SafeStack::runOnFunction(Function &F) { AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B)); } - if (AA->onlyReadsMemory(&F)) { - // XXX: we don't protect against information leak attacks for now. - DEBUG(dbgs() << "[SafeStack] function only reads memory\n"); - return false; - } - ++NumFunctions; SmallVector<AllocaInst *, 16> StaticAllocas; SmallVector<AllocaInst *, 4> DynamicAllocas; + SmallVector<Argument *, 4> ByValArguments; SmallVector<ReturnInst *, 4> Returns; // Collect all points where stack gets unwound and needs to be restored @@ -562,23 +709,26 @@ bool SafeStack::runOnFunction(Function &F) { // Find all static and dynamic alloca instructions that must be moved to the // unsafe stack, all return instructions and stack restore points. - findInsts(F, StaticAllocas, DynamicAllocas, Returns, StackRestorePoints); + findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns, + StackRestorePoints); if (StaticAllocas.empty() && DynamicAllocas.empty() && - StackRestorePoints.empty()) + ByValArguments.empty() && StackRestorePoints.empty()) return false; // Nothing to do in this function. - if (!StaticAllocas.empty() || !DynamicAllocas.empty()) + if (!StaticAllocas.empty() || !DynamicAllocas.empty() || + !ByValArguments.empty()) ++NumUnsafeStackFunctions; // This function has the unsafe stack. if (!StackRestorePoints.empty()) ++NumUnsafeStackRestorePointsFunctions; - if (!UnsafeStackPtr) - UnsafeStackPtr = getOrCreateUnsafeStackPtr(*F.getParent()); + IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); + UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F); // The top of the unsafe stack after all unsafe static allocas are allocated. - Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns); + Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, + ByValArguments, Returns); // Safe stack object that stores the current unsafe stack top. It is updated // as unsafe dynamic (non-constant-sized) allocas are allocated and freed. @@ -587,7 +737,7 @@ bool SafeStack::runOnFunction(Function &F) { // FIXME: a better alternative might be to store the unsafe stack pointer // before setjmp / invoke instructions. AllocaInst *DynamicTop = createStackRestorePoints( - F, StackRestorePoints, StaticTop, !DynamicAllocas.empty()); + IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty()); // Handle dynamic allocas. moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop, @@ -597,13 +747,14 @@ bool SafeStack::runOnFunction(Function &F) { return true; } -} // end anonymous namespace +} // anonymous namespace char SafeStack::ID = 0; -INITIALIZE_PASS_BEGIN(SafeStack, "safe-stack", - "Safe Stack instrumentation pass", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(SafeStack, "safe-stack", "Safe Stack instrumentation pass", - false, false) +INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack", + "Safe Stack instrumentation pass", false, false) +INITIALIZE_TM_PASS_END(SafeStack, "safe-stack", + "Safe Stack instrumentation pass", false, false) -FunctionPass *llvm::createSafeStackPass() { return new SafeStack(); } +FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { + return new SafeStack(TM); +} diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 7a5b4cb..09de7a2 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -31,6 +31,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -59,6 +60,7 @@ static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter"; static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block"; static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp"; +static const char *const kSanCovTraceSwitch = "__sanitizer_cov_trace_switch"; static const char *const kSanCovModuleCtorName = "sancov.module_ctor"; static const uint64_t kSanCtorAndDtorPriority = 2; @@ -148,19 +150,25 @@ class SanitizerCoverageModule : public ModulePass { void InjectCoverageForIndirectCalls(Function &F, ArrayRef<Instruction *> IndirCalls); void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets); + void InjectTraceForSwitch(Function &F, + ArrayRef<Instruction *> SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); void SetNoSanitizeMetadata(Instruction *I); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls); unsigned NumberOfInstrumentedBlocks() { - return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses(); + return SanCovFunction->getNumUses() + + SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() + + SanCovTraceEnter->getNumUses(); } Function *SanCovFunction; Function *SanCovWithCheckFunction; Function *SanCovIndirCallFunction; Function *SanCovTraceEnter, *SanCovTraceBB; Function *SanCovTraceCmpFunction; + Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *Int64Ty; + Type *IntptrTy, *Int64Ty, *Int64PtrTy; + Module *CurModule; LLVMContext *C; const DataLayout *DL; @@ -177,11 +185,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { return false; C = &(M.getContext()); DL = &M.getDataLayout(); + CurModule = &M; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int64Ty = IRB.getInt64Ty(); SanCovFunction = checkSanitizerInterfaceFunction( @@ -194,18 +204,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { SanCovTraceCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + SanCovTraceSwitchFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kSanCovTraceSwitch, VoidTy, Int64Ty, Int64PtrTy, nullptr)); // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); - if (Options.TraceBB) { - SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); - SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); - } + SanCovTraceEnter = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); + SanCovTraceBB = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); // At this point we create a dummy array of guards because we don't // know how many elements we will need. @@ -280,11 +291,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { if (F.empty()) return false; if (F.getName().find(".module_ctor") != std::string::npos) return false; // Should not instrument sanitizer init functions. + // Don't instrument functions using SEH for now. Splitting basic blocks like + // we do for coverage breaks WinEHPrepare. + // FIXME: Remove this when SEH no longer uses landingpad pattern matching. + if (F.hasPersonalityFn() && + isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + return false; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F); SmallVector<Instruction*, 8> IndirCalls; SmallVector<BasicBlock*, 16> AllBlocks; SmallVector<Instruction*, 8> CmpTraceTargets; + SmallVector<Instruction*, 8> SwitchTraceTargets; for (auto &BB : F) { AllBlocks.push_back(&BB); for (auto &Inst : BB) { @@ -293,13 +311,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { if (CS && !CS.getCalledFunction()) IndirCalls.push_back(&Inst); } - if (Options.TraceCmp && isa<ICmpInst>(&Inst)) - CmpTraceTargets.push_back(&Inst); + if (Options.TraceCmp) { + if (isa<ICmpInst>(&Inst)) + CmpTraceTargets.push_back(&Inst); + if (isa<SwitchInst>(&Inst)) + SwitchTraceTargets.push_back(&Inst); + } } } InjectCoverage(F, AllBlocks); InjectCoverageForIndirectCalls(F, IndirCalls); InjectTraceForCmp(F, CmpTraceTargets); + InjectTraceForSwitch(F, SwitchTraceTargets); return true; } @@ -348,6 +371,45 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( } } +// For every switch statement we insert a call: +// __sanitizer_cov_trace_switch(CondValue, +// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) + +void SanitizerCoverageModule::InjectTraceForSwitch( + Function &F, ArrayRef<Instruction *> SwitchTraceTargets) { + for (auto I : SwitchTraceTargets) { + if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { + IRBuilder<> IRB(I); + SmallVector<Constant *, 16> Initializers; + Value *Cond = SI->getCondition(); + if (Cond->getType()->getScalarSizeInBits() > + Int64Ty->getScalarSizeInBits()) + continue; + Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases())); + Initializers.push_back( + ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits())); + if (Cond->getType()->getScalarSizeInBits() < + Int64Ty->getScalarSizeInBits()) + Cond = IRB.CreateIntCast(Cond, Int64Ty, false); + for (auto It: SI->cases()) { + Constant *C = It.getCaseValue(); + if (C->getType()->getScalarSizeInBits() < + Int64Ty->getScalarSizeInBits()) + C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty); + Initializers.push_back(C); + } + ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size()); + GlobalVariable *GV = new GlobalVariable( + *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage, + ConstantArray::get(ArrayOfInt64Ty, Initializers), + "__sancov_gen_cov_switch_values"); + IRB.CreateCall(SanCovTraceSwitchFunction, + {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)}); + } + } +} + + void SanitizerCoverageModule::InjectTraceForCmp( Function &F, ArrayRef<Instruction *> CmpTraceTargets) { for (auto I : CmpTraceTargets) { @@ -369,8 +431,7 @@ void SanitizerCoverageModule::InjectTraceForCmp( void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { I->setMetadata( - I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); + I->getModule()->getMDKindID("nosanitize"), MDNode::get(*C, None)); } void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, @@ -382,34 +443,31 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, // locations. if (isa<UnreachableInst>(BB.getTerminator())) return; - BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); - // Skip static allocas at the top of the entry block so they don't become - // dynamic when we split the block. If we used our optimized stack layout, - // then there will only be one alloca and it will come first. - for (; IP != BE; ++IP) { - AllocaInst *AI = dyn_cast<AllocaInst>(IP); - if (!AI || !AI->isStaticAlloca()) - break; - } + BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); + // Keep static allocas and llvm.localescape calls in the entry block. Even + // if we aren't splitting the block, it's nice for allocas to be before + // calls. + IP = PrepareToSplitEntryBlock(BB, IP); } else { EntryLoc = IP->getDebugLoc(); } - IRBuilder<> IRB(IP); + IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); - SmallVector<Value *, 1> Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (UseCalls) { + if (Options.TraceBB) { + IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); + } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); @@ -418,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( - Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); + Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. @@ -427,7 +485,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } if (Options.Use8bitCounters) { - IRB.SetInsertPoint(IP); + IRB.SetInsertPoint(&*IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); @@ -438,13 +496,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } - - if (Options.TraceBB) { - // Experimental support for tracing. - // Insert a callback with the same guard variable as used for coverage. - IRB.SetInsertPoint(IP); - IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); - } } char SanitizerCoverageModule::ID = 0; diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 1a46bbb..9331e1d 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -142,37 +142,35 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr)); OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { - const size_t ByteSize = 1 << i; - const size_t BitSize = ByteSize * 8; - SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); + const unsigned ByteSize = 1U << i; + const unsigned BitSize = ByteSize * 8; + std::string ByteSizeStr = utostr(ByteSize); + std::string BitSizeStr = utostr(BitSize); + SmallString<32> ReadName("__tsan_read" + ByteSizeStr); TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); + SmallString<32> WriteName("__tsan_write" + ByteSizeStr); TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - SmallString<64> UnalignedReadName("__tsan_unaligned_read" + - itostr(ByteSize)); + SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr); TsanUnalignedRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + - itostr(ByteSize)); + SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr); TsanUnalignedWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); - SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) + - "_load"); + SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); - SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) + - "_store"); + SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store"); TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr)); @@ -201,7 +199,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); } - SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + + SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr + "_compare_exchange_val"); TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr)); @@ -513,8 +511,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), @@ -527,8 +525,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), @@ -544,8 +542,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; if (!F) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), @@ -558,8 +556,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), |