diff options
Diffstat (limited to 'lib/Transforms/Instrumentation')
-rw-r--r-- | lib/Transforms/Instrumentation/AddressSanitizer.cpp | 437 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/BlackList.cpp | 105 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/BlackList.h | 57 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/BoundsChecking.cpp | 16 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/FunctionBlackList.cpp | 79 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/FunctionBlackList.h | 37 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/GCOVProfiling.cpp | 73 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/MaximumSpanningTree.h | 53 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 137 |
10 files changed, 624 insertions, 372 deletions
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 17b83ce..b7be462 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "asan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" @@ -35,7 +35,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; +static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *kAsanInitName = "__asan_init"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; @@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInitializers("asan-initialization-order", + cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt<bool> ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); // This flag may need to be replaced with -fasan-blacklist. @@ -144,41 +148,33 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), cl::Hidden, cl::init(-1)); namespace { - -/// An object of this type is created while instrumenting every function. -struct AsanFunctionContext { - AsanFunctionContext(Function &Function) : F(Function) { } - - Function &F; -}; - /// AddressSanitizer: instrument the code in module to find memory bugs. -struct AddressSanitizer : public ModulePass { +struct AddressSanitizer : public FunctionPass { AddressSanitizer(); virtual const char *getPassName() const; - void instrumentMop(AsanFunctionContext &AFC, Instruction *I); - void instrumentAddress(AsanFunctionContext &AFC, - Instruction *OrigIns, IRBuilder<> &IRB, + void instrumentMop(Instruction *I); + void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex); - bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI); - void instrumentMemIntrinsicParam(AsanFunctionContext &AFC, - Instruction *OrigIns, Value *Addr, + bool instrumentMemIntrinsic(MemIntrinsic *MI); + void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); - bool handleFunction(Module &M, Function &F); + bool runOnFunction(Function &F); + void createInitializerPoisonCalls(Module &M, + Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); - bool poisonStackInFunction(Module &M, Function &F); - virtual bool runOnModule(Module &M); + bool poisonStackInFunction(Function &F); + virtual bool doInitialization(Module &M); + virtual bool doFinalization(Module &M); bool insertGlobalRedzones(Module &M); static char ID; // Pass identification, replacement for typeid private: - uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); @@ -194,12 +190,15 @@ struct AddressSanitizer : public ModulePass { } Function *checkInterfaceFunction(Constant *FuncOrBitcast); + bool ShouldInstrumentGlobal(GlobalVariable *G); void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); + void FindDynamicInitializers(Module &M); + bool HasDynamicInitializer(GlobalVariable *G); LLVMContext *C; - TargetData *TD; + DataLayout *TD; uint64_t MappingOffset; int MappingScale; size_t RedzoneSize; @@ -208,11 +207,15 @@ struct AddressSanitizer : public ModulePass { Type *IntptrPtrTy; Function *AsanCtorFunction; Function *AsanInitFunction; + Function *AsanStackMallocFunc, *AsanStackFreeFunc; + Function *AsanHandleNoReturnFunc; Instruction *CtorInsertBefore; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; + SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals; + SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan; }; } // namespace @@ -221,8 +224,8 @@ char AddressSanitizer::ID = 0; INITIALIZE_PASS(AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -AddressSanitizer::AddressSanitizer() : ModulePass(ID) { } -ModulePass *llvm::createAddressSanitizerPass() { +AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { } +FunctionPass *llvm::createAddressSanitizerPass() { return new AddressSanitizer(); } @@ -243,38 +246,6 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { GlobalValue::PrivateLinkage, StrConst, ""); } -// Split the basic block and insert an if-then code. -// Before: -// Head -// Cmp -// Tail -// After: -// Head -// if (Cmp) -// ThenBlock -// Tail -// -// ThenBlock block is created and its terminator is returned. -// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise -// it is terminated with BranchInst to Tail. -static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) { - Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode(); - BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); - TerminatorInst *HeadOldTerm = Head->getTerminator(); - LLVMContext &C = Head->getParent()->getParent()->getContext(); - BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); - TerminatorInst *CheckTerm; - if (Unreachable) - CheckTerm = new UnreachableInst(C, ThenBlock); - else - CheckTerm = BranchInst::Create(Tail, ThenBlock); - BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); - ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - return CheckTerm; -} - Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, MappingScale); @@ -286,12 +257,12 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { } void AddressSanitizer::instrumentMemIntrinsicParam( - AsanFunctionContext &AFC, Instruction *OrigIns, + Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { // Check the first byte. { IRBuilder<> IRB(InsertBefore); - instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite); + instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); } // Check the last byte. { @@ -301,13 +272,12 @@ void AddressSanitizer::instrumentMemIntrinsicParam( SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); + instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); } } // Instrument memset/memmove/memcpy -bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, - MemIntrinsic *MI) { +bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { Value *Dst = MI->getDest(); MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI); Value *Src = MemTran ? MemTran->getSource() : 0; @@ -323,12 +293,12 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, Value *Cmp = IRB.CreateICmpNE(Length, Constant::getNullValue(Length->getType())); - InsertBefore = splitBlockAndInsertIfThen(Cmp, false); + InsertBefore = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false); } - instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true); + instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); if (Src) - instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false); + instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); return true; } @@ -358,14 +328,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { return NULL; } -void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { - bool IsWrite; +void AddressSanitizer::FindDynamicInitializers(Module& M) { + // Clang generates metadata identifying all dynamically initialized globals. + NamedMDNode *DynamicGlobals = + M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); + if (!DynamicGlobals) + return; + for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { + MDNode *MDN = DynamicGlobals->getOperand(i); + assert(MDN->getNumOperands() == 1); + Value *VG = MDN->getOperand(0); + // The optimizer may optimize away a global entirely, in which case we + // cannot instrument access to it. + if (!VG) + continue; + + GlobalVariable *G = cast<GlobalVariable>(VG); + DynamicallyInitializedGlobals.insert(G); + } +} +// Returns true if a global variable is initialized dynamically in this TU. +bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { + return DynamicallyInitializedGlobals.count(G); +} + +void AddressSanitizer::instrumentMop(Instruction *I) { + bool IsWrite = false; Value *Addr = isInterestingMemoryAccess(I, &IsWrite); assert(Addr); - if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { - // We are accessing a global scalar variable. Nothing to catch here. - return; + if (ClOpt && ClOptGlobals) { + if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { + // If initialization order checking is disabled, a simple access to a + // dynamically initialized global is always valid. + if (!ClInitializers) + return; + // If a global variable does not have dynamic initialization we don't + // have to instrument it. However, if a global has external linkage, we + // assume it has dynamic initialization, as it may have an initializer + // in a different TU. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + !HasDynamicInitializer(G)) + return; + } } + Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); @@ -379,7 +385,7 @@ void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { } IRBuilder<> IRB(I); - instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite); + instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -424,8 +430,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } -void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, - Instruction *OrigIns, +void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite) { Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); @@ -444,17 +449,19 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, TerminatorInst *CrashTerm = 0; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { - TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false); + TerminatorInst *CheckTerm = + SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false); assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); - BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB); + BasicBlock *CrashBlock = + BasicBlock::Create(*C, "", NextBB->getParent(), NextBB); CrashTerm = new UnreachableInst(*C, CrashBlock); BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); } else { - CrashTerm = splitBlockAndInsertIfThen(Cmp, true); + CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true); } Instruction *Crash = @@ -462,68 +469,108 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, Crash->setDebugLoc(OrigIns->getDebugLoc()); } +void AddressSanitizer::createInitializerPoisonCalls(Module &M, + Value *FirstAddr, + Value *LastAddr) { + // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. + Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); + // If that function is not present, this TU contains no globals, or they have + // all been optimized away + if (!GlobalInit) + return; + + // Set up the arguments to our poison/unpoison functions. + IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + + // Declare our poisoning and unpoisoning functions. + Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); + Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); + AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); + + // Add a call to poison all external globals before the given function starts. + IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + + // Add calls to unpoison all globals before each return instruction. + for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); + I != E; ++I) { + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { + CallInst::Create(AsanUnpoisonGlobals, "", RI); + } + } +} + +bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { + Type *Ty = cast<PointerType>(G->getType())->getElementType(); + DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); + + if (BL->isIn(*G)) return false; + if (!Ty->isSized()) return false; + if (!G->hasInitializer()) return false; + if (GlobalsCreatedByAsan.count(G)) return false; // Our own global. + // Touch only those globals that will not be defined in other modules. + // Don't handle ODR type linkages since other modules may be built w/o asan. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + G->getLinkage() != GlobalVariable::PrivateLinkage && + G->getLinkage() != GlobalVariable::InternalLinkage) + return false; + // Two problems with thread-locals: + // - The address of the main thread's copy can't be computed at link-time. + // - Need to poison all copies, not just the main thread's one. + if (G->isThreadLocal()) + return false; + // For now, just ignore this Alloca if the alignment is large. + if (G->getAlignment() > RedzoneSize) return false; + + // Ignore all the globals with the names starting with "\01L_OBJC_". + // Many of those are put into the .cstring section. The linker compresses + // that section by removing the spare \0s after the string terminator, so + // our redzones get broken. + if ((G->getName().find("\01L_OBJC_") == 0) || + (G->getName().find("\01l_OBJC_") == 0)) { + DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); + return false; + } + + if (G->hasSection()) { + StringRef Section(G->getSection()); + // Ignore the globals from the __OBJC section. The ObjC runtime assumes + // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to + // them. + if ((Section.find("__OBJC,") == 0) || + (Section.find("__DATA, __objc_") == 0)) { + DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); + return false; + } + // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 + // Constant CFString instances are compiled in the following way: + // -- the string buffer is emitted into + // __TEXT,__cstring,cstring_literals + // -- the constant NSConstantString structure referencing that buffer + // is placed into __DATA,__cfstring + // Therefore there's no point in placing redzones into __DATA,__cfstring. + // Moreover, it causes the linker to crash on OS X 10.7 + if (Section.find("__DATA,__cfstring") == 0) { + DEBUG(dbgs() << "Ignoring CFString: " << *G); + return false; + } + } + + return true; +} + // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. bool AddressSanitizer::insertGlobalRedzones(Module &M) { SmallVector<GlobalVariable *, 16> GlobalsToChange; - for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), - E = M.getGlobalList().end(); G != E; ++G) { - Type *Ty = cast<PointerType>(G->getType())->getElementType(); - DEBUG(dbgs() << "GLOBAL: " << *G); - - if (!Ty->isSized()) continue; - if (!G->hasInitializer()) continue; - // Touch only those globals that will not be defined in other modules. - // Don't handle ODR type linkages since other modules may be built w/o asan. - if (G->getLinkage() != GlobalVariable::ExternalLinkage && - G->getLinkage() != GlobalVariable::PrivateLinkage && - G->getLinkage() != GlobalVariable::InternalLinkage) - continue; - // Two problems with thread-locals: - // - The address of the main thread's copy can't be computed at link-time. - // - Need to poison all copies, not just the main thread's one. - if (G->isThreadLocal()) - continue; - // For now, just ignore this Alloca if the alignment is large. - if (G->getAlignment() > RedzoneSize) continue; - - // Ignore all the globals with the names starting with "\01L_OBJC_". - // Many of those are put into the .cstring section. The linker compresses - // that section by removing the spare \0s after the string terminator, so - // our redzones get broken. - if ((G->getName().find("\01L_OBJC_") == 0) || - (G->getName().find("\01l_OBJC_") == 0)) { - DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); - continue; - } - - if (G->hasSection()) { - StringRef Section(G->getSection()); - // Ignore the globals from the __OBJC section. The ObjC runtime assumes - // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to - // them. - if ((Section.find("__OBJC,") == 0) || - (Section.find("__DATA, __objc_") == 0)) { - DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); - continue; - } - // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 - // Constant CFString instances are compiled in the following way: - // -- the string buffer is emitted into - // __TEXT,__cstring,cstring_literals - // -- the constant NSConstantString structure referencing that buffer - // is placed into __DATA,__cfstring - // Therefore there's no point in placing redzones into __DATA,__cfstring. - // Moreover, it causes the linker to crash on OS X 10.7 - if (Section.find("__DATA,__cfstring") == 0) { - DEBUG(dbgs() << "Ignoring CFString: " << *G); - continue; - } - } - - GlobalsToChange.push_back(G); + for (Module::GlobalListType::iterator G = M.global_begin(), + E = M.global_end(); G != E; ++G) { + if (ShouldInstrumentGlobal(G)) + GlobalsToChange.push_back(G); } size_t n = GlobalsToChange.size(); @@ -534,13 +581,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, - IntptrTy, IntptrTy, NULL); - SmallVector<Constant *, 16> Initializers(n); + IntptrTy, IntptrTy, + IntptrTy, NULL); + SmallVector<Constant *, 16> Initializers(n), DynamicInit; IRBuilder<> IRB(CtorInsertBefore); + if (ClInitializers) + FindDynamicInitializers(M); + + // The addresses of the first and last dynamically initialized globals in + // this TU. Used in initialization order checking. + Value *FirstDynamic = 0, *LastDynamic = 0; + for (size_t i = 0; i < n; i++) { GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast<PointerType>(G->getType()); @@ -549,6 +605,10 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { uint64_t RightRedzoneSize = RedzoneSize + (RedzoneSize - (SizeInBytes % RedzoneSize)); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + // Determine whether this global should be poisoned in initialization. + bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); + // Don't check initialization order if this global is blacklisted. + GlobalHasDynamicInitializer &= !BL->isInInit(*G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -583,8 +643,17 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); - DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); + + // Populate the first and last globals declared in this TU. + if (ClInitializers && GlobalHasDynamicInitializer) { + LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); + if (FirstDynamic == 0) + FirstDynamic = LastDynamic; + } + + DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); @@ -592,8 +661,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + // Create calls for poisoning before initializers run and unpoisoning after. + if (ClInitializers && FirstDynamic && LastDynamic) + createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, NULL)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); IRB.CreateCall2(AsanRegisterGlobals, @@ -623,12 +697,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { } // virtual -bool AddressSanitizer::runOnModule(Module &M) { +bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. - TD = getAnalysisIfAvailable<TargetData>(); + TD = getAnalysisIfAvailable<DataLayout>(); + if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); C = &(M.getContext()); LongSize = TD->getPointerSizeInBits(); @@ -656,17 +731,27 @@ bool AddressSanitizer::runOnModule(Module &M) { std::string FunctionName = std::string(kAsanReportErrorTemplate) + (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); // If we are merging crash callbacks, they have two parameters. - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( - M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = + checkInterfaceFunction(M.getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } + + AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL)); + AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanStackFreeName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy, NULL)); + AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( + kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); + // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI; + bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); @@ -686,10 +771,6 @@ bool AddressSanitizer::runOnModule(Module &M) { // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. RedzoneSize = std::max(32, (int)(1 << MappingScale)); - bool Res = false; - - if (ClGlobals) - Res |= insertGlobalRedzones(M); if (ClMappingOffsetLog >= 0) { // Tell the run-time the current values of mapping offset and scale. @@ -709,17 +790,20 @@ bool AddressSanitizer::runOnModule(Module &M) { IRB.CreateLoad(asan_mapping_scale, true); } - - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - Res |= handleFunction(M, *F); - } - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); - return Res; + return true; +} + +bool AddressSanitizer::doFinalization(Module &M) { + // We transform the globals at the very end so that the optimization analysis + // works on the original globals. + if (ClGlobals) + return insertGlobalRedzones(M); + return false; } + bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. @@ -736,19 +820,22 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { return false; } -bool AddressSanitizer::handleFunction(Module &M, Function &F) { +bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); - if (!F.hasFnAttr(Attribute::AddressSafety)) return false; + if (!F.getFnAttributes().hasAttribute(Attributes::AddressSafety)) + return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; - // We want to instrument every address only once per basic block - // (unless there are calls between uses). + + // We want to instrument every address only once per basic block (unless there + // are calls between uses). SmallSet<Value*, 16> TempsToInstrument; SmallVector<Instruction*, 16> ToInstrument; SmallVector<Instruction*, 8> NoReturnCalls; @@ -786,8 +873,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { } } - AsanFunctionContext AFC(F); - // Instrument. int NumInstrumented = 0; for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { @@ -795,25 +880,23 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { if (isInterestingMemoryAccess(Inst, &IsWrite)) - instrumentMop(AFC, Inst); + instrumentMop(Inst); else - instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst)); + instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); } NumInstrumented++; } - DEBUG(dbgs() << F); - - bool ChangedStack = poisonStackInFunction(M, F); + bool ChangedStack = poisonStackInFunction(F); // We must unpoison the stack before every NoReturn call (throw, _exit, etc). // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) { Instruction *CI = NoReturnCalls[i]; IRBuilder<> IRB(CI); - IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName, - IRB.getVoidTy(), NULL)); + IRB.CreateCall(AsanHandleNoReturnFunc); } + DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n"); return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty(); } @@ -926,7 +1009,7 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { // compiler hoists the load of the shadow value somewhere too high. // This causes asan to report a non-existing bug on 453.povray. // It sounds like an LLVM bug. -bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { +bool AddressSanitizer::poisonStackInFunction(Function &F) { if (!ClStack) return false; SmallVector<AllocaInst*, 16> AllocaVec; SmallVector<Instruction*, 8> RetVec; @@ -976,8 +1059,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { Value *LocalStackBase = OrigStackBase; if (DoStackMalloc) { - Value *AsanStackMallocFunc = M.getOrInsertFunction( - kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL); LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc, ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); } @@ -1012,22 +1093,16 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, LongSize/8)); BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); - Value *Description = IRB.CreatePointerCast( - createPrivateGlobalForString(M, StackDescription.str()), - IntptrTy); + GlobalVariable *StackDescriptionGlobal = + createPrivateGlobalForString(*F.getParent(), StackDescription.str()); + GlobalsCreatedByAsan.insert(StackDescriptionGlobal); + Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); // Poison the stack redzones at the entry. Value *ShadowBase = memToShadow(LocalStackBase, IRB); PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true); - Value *AsanStackFreeFunc = NULL; - if (DoStackMalloc) { - AsanStackFreeFunc = M.getOrInsertFunction( - kAsanStackFreeName, IRB.getVoidTy(), - IntptrTy, IntptrTy, IntptrTy, NULL); - } - // Unpoison the stack before all ret instructions. for (size_t i = 0, n = RetVec.size(); i < n; i++) { Instruction *Ret = RetVec[i]; @@ -1046,6 +1121,10 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { } } + // We are done. Remove the old unused alloca instructions. + for (size_t i = 0, n = AllocaVec.size(); i < n; i++) + AllocaVec[i]->eraseFromParent(); + if (ClDebugStack) { DEBUG(dbgs() << F); } diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp new file mode 100644 index 0000000..ef34b8a --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -0,0 +1,105 @@ +//===-- BlackList.cpp - blacklist for sanitizers --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +//===----------------------------------------------------------------------===// + +#include <utility> +#include <string> + +#include "BlackList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +namespace llvm { + +BlackList::BlackList(const StringRef Path) { + // Validate and open blacklist file. + if (!Path.size()) return; + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + report_fatal_error("Can't open blacklist file: " + Path + ": " + + EC.message()); + } + + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(File.take()->getBuffer(), Lines, "\n\r"); + StringMap<std::string> Regexps; + for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I) { + // Ignore empty lines and lines starting with "#" + if (I->empty() || I->startswith("#")) + continue; + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + std::string Regexp = SplitLine.second; + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string Error; + if (!CheckRE.isValid(Error)) { + report_fatal_error("malformed blacklist regex: " + SplitLine.second + + ": " + Error); + } + + // Add this regexp into the proper group by its prefix. + if (Regexps[Prefix].size()) + Regexps[Prefix] += "|"; + Regexps[Prefix] += Regexp; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end(); + I != E; ++I) { + Entries[I->getKey()] = new Regex(I->getValue()); + } +} + +bool BlackList::isIn(const Function &F) { + return isIn(*F.getParent()) || inSection("fun", F.getName()); +} + +bool BlackList::isIn(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global", G.getName()); +} + +bool BlackList::isIn(const Module &M) { + return inSection("src", M.getModuleIdentifier()); +} + +bool BlackList::isInInit(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global-init", G.getName()); +} + +bool BlackList::inSection(const StringRef Section, + const StringRef Query) { + Regex *FunctionRegex = Entries[Section]; + return FunctionRegex ? FunctionRegex->match(Query) : false; +} + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h new file mode 100644 index 0000000..f3c05a5 --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.h @@ -0,0 +1,57 @@ +//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +// The blacklist disables instrumentation of various functions and global +// variables. Each line contains a prefix, followed by a wild card expression. +// Empty lines and lines starting with "#" are ignored. +// --- +// # Blacklisted items: +// fun:*_ZN4base6subtle* +// global:*global_with_bad_access_or_initialization* +// global-init:*global_with_initialization_issues* +// src:file_with_tricky_code.cc +// --- +// Note that the wild card is in fact an llvm::Regex, but * is automatically +// replaced with .* +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class Function; +class GlobalVariable; +class Module; +class Regex; +class StringRef; + +class BlackList { + public: + BlackList(const StringRef Path); + // Returns whether either this function or it's source file are blacklisted. + bool isIn(const Function &F); + // Returns whether either this global or it's source file are blacklisted. + bool isIn(const GlobalVariable &G); + // Returns whether this module is blacklisted by filename. + bool isIn(const Module &M); + // Returns whether a global should be excluded from initialization checking. + bool isInInit(const GlobalVariable &G); + private: + StringMap<Regex*> Entries; + + bool inSection(const StringRef Section, const StringRef Query); +}; + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 09e0f14..7810b1b 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -23,7 +23,8 @@ #include "llvm/Support/InstIterator.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Instrumentation.h" using namespace llvm; @@ -47,11 +48,13 @@ namespace { virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); + AU.addRequired<DataLayout>(); + AU.addRequired<TargetLibraryInfo>(); } private: - const TargetData *TD; + const DataLayout *TD; + const TargetLibraryInfo *TLI; ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; Instruction *Inst; @@ -140,7 +143,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); - IntegerType *IntTy = TD->getIntPtrType(Inst->getContext()); + Type *IntTy = TD->getIntPtrType(Ptr->getType()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); // three checks are required to ensure safety: @@ -165,12 +168,13 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { } bool BoundsChecking::runOnFunction(Function &F) { - TD = &getAnalysis<TargetData>(); + TD = &getAnalysis<DataLayout>(); + TLI = &getAnalysis<TargetLibraryInfo>(); TrapBB = 0; BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); Builder = &TheBuilder; - ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext()); + ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext()); ObjSizeEval = &TheObjSizeEval; // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 00de882..058f68c 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,8 +1,8 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp + BlackList.cpp BoundsChecking.cpp EdgeProfiling.cpp - FunctionBlackList.cpp GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp deleted file mode 100644 index 188ea4d..0000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// - -#include "FunctionBlackList.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Function.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" - -namespace llvm { - -FunctionBlackList::FunctionBlackList(const std::string &Path) { - Functions = NULL; - const char *kFunPrefix = "fun:"; - if (!Path.size()) return; - std::string Fun; - - OwningPtr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) { - report_fatal_error("Can't open blacklist file " + Path + ": " + - EC.message()); - } - MemoryBuffer *Buff = File.take(); - const char *Data = Buff->getBufferStart(); - size_t DataLen = Buff->getBufferSize(); - SmallVector<StringRef, 16> Lines; - SplitString(StringRef(Data, DataLen), Lines, "\n\r"); - for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { - if (Lines[i].startswith(kFunPrefix)) { - std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - std::string ThisFuncRE; - // add ThisFunc replacing * with .* - for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { - if (ThisFunc[j] == '*') - ThisFuncRE += '.'; - ThisFuncRE += ThisFunc[j]; - } - // Check that the regexp is valid. - Regex CheckRE(ThisFuncRE); - std::string Error; - if (!CheckRE.isValid(Error)) - report_fatal_error("malformed blacklist regex: " + ThisFunc + - ": " + Error); - // Append to the final regexp. - if (Fun.size()) - Fun += "|"; - Fun += ThisFuncRE; - } - } - if (Fun.size()) { - Functions = new Regex(Fun); - } -} - -bool FunctionBlackList::isIn(const Function &F) { - if (Functions) { - bool Res = Functions->match(F.getName()); - return Res; - } - return false; -} - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h deleted file mode 100644 index c1239b9..0000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// -// - -#include <string> - -namespace llvm { -class Function; -class Regex; - -// Blacklisted functions are not instrumented. -// The blacklist file contains one or more lines like this: -// --- -// fun:FunctionWildCard -// --- -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -class FunctionBlackList { - public: - FunctionBlackList(const std::string &Path); - bool isIn(const Function &F); - private: - Regex *Functions; -}; - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 264a6a6..e9192e5 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -88,11 +88,11 @@ namespace { // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, - MDNode *>, 8> &); + void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); + void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); - std::string mangleName(DICompileUnit CU, std::string NewStem); + std::string mangleName(DICompileUnit CU, const char *NewStem); bool EmitNotes; bool EmitData; @@ -329,7 +329,7 @@ namespace { }; } -std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) { +std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { MDNode *N = GCov->getOperand(i); @@ -519,6 +519,7 @@ bool GCOVProfiler::emitProfileArcs() { } insertCounterWriteout(CountersBySP); + insertFlush(CountersBySP); } if (InsertIndCounterIncrCode) @@ -630,14 +631,15 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { } void GCOVProfiler::insertCounterWriteout( - SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { - FunctionType *WriteoutFTy = - FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *WriteoutF = Function::Create(WriteoutFTy, - GlobalValue::InternalLinkage, - "__llvm_gcov_writeout", M); + ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { + FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); + if (!WriteoutF) + WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, + "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); - BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); IRBuilder<> Builder(BB); Constant *StartFile = getStartFileFunc(); @@ -648,11 +650,11 @@ void GCOVProfiler::insertCounterWriteout( NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (CU_Nodes) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit compile_unit(CU_Nodes->getOperand(i)); - std::string FilenameGcda = mangleName(compile_unit, "gcda"); + DICompileUnit CU(CU_Nodes->getOperand(i)); + std::string FilenameGcda = mangleName(CU, "gcda"); Builder.CreateCall(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator I = CountersBySP.begin(), E = CountersBySP.end(); I != E; ++I) { DISubprogram SP(I->second); @@ -680,7 +682,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_init", M); F->setUnnamedAddr(true); F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); + F->addFnAttr(Attributes::NoInline); BB = BasicBlock::Create(*Ctx, "entry", F); Builder.SetInsertPoint(BB); @@ -699,7 +701,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc()); Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); - Fn->addFnAttr(Attribute::NoInline); + Fn->addFnAttr(Attributes::NoInline); Type *Int32Ty = Type::getInt32Ty(*Ctx); Type *Int64Ty = Type::getInt64Ty(*Ctx); @@ -745,3 +747,42 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.SetInsertPoint(Exit); Builder.CreateRetVoid(); } + +void GCOVProfiler:: +insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *FlushF = M->getFunction("__gcov_flush"); + if (!FlushF) + FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, + "__gcov_flush", M); + else + FlushF->setLinkage(GlobalValue::InternalLinkage); + FlushF->setUnnamedAddr(true); + + BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); + + // Write out the current counters. + Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout"); + assert(WriteoutF && "Need to create the writeout function first!"); + + IRBuilder<> Builder(Entry); + Builder.CreateCall(WriteoutF); + + // Zero out the counters. + for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator + I = CountersBySP.begin(), E = CountersBySP.end(); + I != E; ++I) { + GlobalVariable *GV = I->first; + Constant *Null = Constant::getNullValue(GV->getType()->getElementType()); + Builder.CreateStore(Null, GV); + } + + Type *RetTy = FlushF->getReturnType(); + if (RetTy == Type::getVoidTy(*Ctx)) + Builder.CreateRetVoid(); + else if (RetTy->isIntegerTy()) + // Used if __gcov_flush was implicitly declared. + Builder.CreateRet(ConstantInt::get(RetTy, 0)); + else + report_fatal_error("invalid return type for __gcov_flush"); +} diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index f76c77e..a4bb5a6 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -26,30 +26,6 @@ namespace llvm { /// The type parameter T determines the type of the nodes of the graph. template <typename T> class MaximumSpanningTree { - - // A comparing class for comparing weighted edges. - template <typename CT> - struct EdgeWeightCompare { - bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, - typename MaximumSpanningTree<CT>::EdgeWeight Y) const { - if (X.second > Y.second) return true; - if (X.second < Y.second) return false; - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - return false; - } - }; - public: typedef std::pair<const T*, const T*> Edge; typedef std::pair<Edge, double> EdgeWeight; @@ -59,6 +35,33 @@ namespace llvm { MaxSpanTree MST; + private: + // A comparing class for comparing weighted edges. + struct EdgeWeightCompare { + static bool getBlockSize(const T *X) { + const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X); + return BB ? BB->size() : 0; + } + + bool operator()(EdgeWeight X, EdgeWeight Y) const { + if (X.second > Y.second) return true; + if (X.second < Y.second) return false; + + // Equal edge weights: break ties by comparing block sizes. + size_t XSizeA = getBlockSize(X.first.first); + size_t YSizeA = getBlockSize(Y.first.first); + if (XSizeA > YSizeA) return true; + if (XSizeA < YSizeA) return false; + + size_t XSizeB = getBlockSize(X.first.second); + size_t YSizeB = getBlockSize(Y.first.second); + if (XSizeB > YSizeB) return true; + if (XSizeB < YSizeB) return false; + + return false; + } + }; + public: static char ID; // Class identification, replacement for typeinfo @@ -66,7 +69,7 @@ namespace llvm { /// spanning tree. MaximumSpanningTree(EdgeWeights &EdgeVector) { - std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>()); + std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare()); // Create spanning tree, Forest contains a special data structure // that makes checking if two nodes are already in a common (sub-)tree diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index dc0fa71..9e10fc4 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,7 +21,7 @@ #define DEBUG_TYPE "tsan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" @@ -38,7 +38,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -47,10 +47,19 @@ using namespace llvm; static cl::opt<std::string> ClBlackListFile("tsan-blacklist", cl::desc("Blacklist file"), cl::Hidden); +static cl::opt<bool> ClInstrumentMemoryAccesses( + "tsan-instrument-memory-accesses", cl::init(true), + cl::desc("Instrument memory accesses"), cl::Hidden); +static cl::opt<bool> ClInstrumentFuncEntryExit( + "tsan-instrument-func-entry-exit", cl::init(true), + cl::desc("Instrument function entry and exit"), cl::Hidden); +static cl::opt<bool> ClInstrumentAtomics( + "tsan-instrument-atomics", cl::init(true), + cl::desc("Instrument atomics"), cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); -STATISTIC(NumOmittedReadsBeforeWrite, +STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); @@ -76,8 +85,8 @@ struct ThreadSanitizer : public FunctionPass { bool addrPointsToConstantData(Value *Addr); int getMemoryAccessFuncIndex(Value *Addr); - TargetData *TD; - OwningPtr<FunctionBlackList> BL; + DataLayout *TD; + OwningPtr<BlackList> BL; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. Function *TsanFuncEntry; @@ -88,6 +97,10 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanWrite[kNumberOfAccessSizes]; Function *TsanAtomicLoad[kNumberOfAccessSizes]; Function *TsanAtomicStore[kNumberOfAccessSizes]; + Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes]; + Function *TsanAtomicCAS[kNumberOfAccessSizes]; + Function *TsanAtomicThreadFence; + Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; }; } // namespace @@ -118,10 +131,10 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { } bool ThreadSanitizer::doInitialization(Module &M) { - TD = getAnalysisIfAvailable<TargetData>(); + TD = getAnalysisIfAvailable<DataLayout>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -158,10 +171,42 @@ bool ThreadSanitizer::doInitialization(Module &M) { TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, NULL)); + + for (int op = AtomicRMWInst::FIRST_BINOP; + op <= AtomicRMWInst::LAST_BINOP; ++op) { + TsanAtomicRMW[op][i] = NULL; + const char *NamePart = NULL; + if (op == AtomicRMWInst::Xchg) + NamePart = "_exchange"; + else if (op == AtomicRMWInst::Add) + NamePart = "_fetch_add"; + else if (op == AtomicRMWInst::Sub) + NamePart = "_fetch_sub"; + else if (op == AtomicRMWInst::And) + NamePart = "_fetch_and"; + else if (op == AtomicRMWInst::Or) + NamePart = "_fetch_or"; + else if (op == AtomicRMWInst::Xor) + NamePart = "_fetch_xor"; + else + continue; + SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); + TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( + RMWName, Ty, PtrTy, Ty, OrdTy, NULL)); + } + + SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + + "_compare_exchange_val"); + TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( + AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, NULL)); } TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); + TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); return true; } @@ -186,7 +231,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { NumOmittedReadsFromConstantGlobals++; return true; } - } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { + } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) { if (isVtableAccess(L)) { // Reads from a vtable pointer can not race with any writes. NumOmittedReadsFromVtable++; @@ -244,8 +289,8 @@ static bool isAtomic(Instruction *I) { return true; if (isa<AtomicCmpXchgInst>(I)) return true; - if (FenceInst *FI = dyn_cast<FenceInst>(I)) - return FI->getSynchScope() == CrossThread; + if (isa<FenceInst>(I)) + return true; return false; } @@ -284,17 +329,19 @@ bool ThreadSanitizer::runOnFunction(Function &F) { // (e.g. variables that do not escape, etc). // Instrument memory accesses. - for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { - Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); - } + if (ClInstrumentMemoryAccesses) + for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { + Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); + } // Instrument atomic memory accesses. - for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) { - Res |= instrumentAtomic(AtomicAccesses[i]); - } + if (ClInstrumentAtomics) + for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) { + Res |= instrumentAtomic(AtomicAccesses[i]); + } // Instrument function entry/exit points if there were instrumented accesses. - if (Res || HasCalls) { + if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); Value *ReturnAddress = IRB.CreateCall( Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), @@ -343,12 +390,12 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { switch (ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. - case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. - case Acquire: v = 1 << 2; break; - case Release: v = 1 << 3; break; - case AcquireRelease: v = 1 << 4; break; - case SequentiallyConsistent: v = 1 << 5; break; + case Monotonic: v = 0; break; + // case Consume: v = 1; break; // Not specified yet. + case Acquire: v = 2; break; + case Release: v = 3; break; + case AcquireRelease: v = 4; break; + case SequentiallyConsistent: v = 5; break; } return IRB->getInt32(v); } @@ -385,12 +432,44 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { CallInst *C = CallInst::Create(TsanAtomicStore[Idx], ArrayRef<Value*>(Args)); ReplaceInstWithInst(I, C); - } else if (isa<AtomicRMWInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<AtomicCmpXchgInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<FenceInst>(I)) { - // FIXME: Not yet supported. + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { + Value *Addr = RMWI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; + if (F == NULL) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), + createOrdering(&IRB, RMWI->getOrdering())}; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { + Value *Addr = CASI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false), + IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), + createOrdering(&IRB, CASI->getOrdering())}; + CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { + Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; + Function *F = FI->getSynchScope() == SingleThread ? + TsanAtomicSignalFence : TsanAtomicThreadFence; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); } return true; } |