diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Instrumentation')
8 files changed, 1445 insertions, 77 deletions
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp new file mode 100644 index 0000000..b43b9e5 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -0,0 +1,937 @@ +//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// Details of the algorithm: +// http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asan" + +#include "FunctionBlackList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" + +#include <string> +#include <algorithm> + +using namespace llvm; + +static const uint64_t kDefaultShadowScale = 3; +static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; +static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; + +static const size_t kMaxStackMallocSize = 1 << 16; // 64K +static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; +static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; + +static const char *kAsanModuleCtorName = "asan.module_ctor"; +static const char *kAsanModuleDtorName = "asan.module_dtor"; +static const int kAsanCtorAndCtorPriority = 1; +static const char *kAsanReportErrorTemplate = "__asan_report_"; +static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; +static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *kAsanInitName = "__asan_init"; +static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; +static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; +static const char *kAsanMappingScaleName = "__asan_mapping_scale"; +static const char *kAsanStackMallocName = "__asan_stack_malloc"; +static const char *kAsanStackFreeName = "__asan_stack_free"; + +static const int kAsanStackLeftRedzoneMagic = 0xf1; +static const int kAsanStackMidRedzoneMagic = 0xf2; +static const int kAsanStackRightRedzoneMagic = 0xf3; +static const int kAsanStackPartialRedzoneMagic = 0xf4; + +// Command-line flags. + +// This flag may need to be replaced with -f[no-]asan-reads. +static cl::opt<bool> ClInstrumentReads("asan-instrument-reads", + cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", + cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); +// This flag may need to be replaced with -f[no]asan-stack. +static cl::opt<bool> ClStack("asan-stack", + cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); +// This flag may need to be replaced with -f[no]asan-use-after-return. +static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", + cl::desc("Check return-after-free"), cl::Hidden, cl::init(false)); +// This flag may need to be replaced with -f[no]asan-globals. +static cl::opt<bool> ClGlobals("asan-globals", + cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClMemIntrin("asan-memintrin", + cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); +// This flag may need to be replaced with -fasan-blacklist. +static cl::opt<std::string> ClBlackListFile("asan-blacklist", + cl::desc("File containing the list of functions to ignore " + "during instrumentation"), cl::Hidden); + +// These flags allow to change the shadow mapping. +// The shadow mapping looks like +// Shadow = (Mem >> scale) + (1 << offset_log) +static cl::opt<int> ClMappingScale("asan-mapping-scale", + cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); +static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log", + cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1)); + +// Optimization flags. Not user visible, used mostly for testing +// and benchmarking the tool. +static cl::opt<bool> ClOpt("asan-opt", + cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp", + cl::desc("Instrument the same temp just once"), cl::Hidden, + cl::init(true)); +static cl::opt<bool> ClOptGlobals("asan-opt-globals", + cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); + +// Debug flags. +static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, + cl::init(0)); +static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"), + cl::Hidden, cl::init(0)); +static cl::opt<std::string> ClDebugFunc("asan-debug-func", + cl::Hidden, cl::desc("Debug func")); +static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), + cl::Hidden, cl::init(-1)); +static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), + cl::Hidden, cl::init(-1)); + +namespace { + +/// AddressSanitizer: instrument the code in module to find memory bugs. +struct AddressSanitizer : public ModulePass { + AddressSanitizer(); + virtual const char *getPassName() const; + void instrumentMop(Instruction *I); + void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, + Value *Addr, uint32_t TypeSize, bool IsWrite); + Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr, + bool IsWrite, uint32_t TypeSize); + bool instrumentMemIntrinsic(MemIntrinsic *MI); + void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, + Value *Size, + Instruction *InsertBefore, bool IsWrite); + Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + bool handleFunction(Module &M, Function &F); + bool maybeInsertAsanInitAtFunctionEntry(Function &F); + bool poisonStackInFunction(Module &M, Function &F); + virtual bool runOnModule(Module &M); + bool insertGlobalRedzones(Module &M); + BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp); + static char ID; // Pass identification, replacement for typeid + + private: + + uint64_t getAllocaSizeInBytes(AllocaInst *AI) { + Type *Ty = AI->getAllocatedType(); + uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); + return SizeInBytes; + } + uint64_t getAlignedSize(uint64_t SizeInBytes) { + return ((SizeInBytes + RedzoneSize - 1) + / RedzoneSize) * RedzoneSize; + } + uint64_t getAlignedAllocaSize(AllocaInst *AI) { + uint64_t SizeInBytes = getAllocaSizeInBytes(AI); + return getAlignedSize(SizeInBytes); + } + + void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, + Value *ShadowBase, bool DoPoison); + bool LooksLikeCodeInBug11395(Instruction *I); + + Module *CurrentModule; + LLVMContext *C; + TargetData *TD; + uint64_t MappingOffset; + int MappingScale; + size_t RedzoneSize; + int LongSize; + Type *IntptrTy; + Type *IntptrPtrTy; + Function *AsanCtorFunction; + Function *AsanInitFunction; + Instruction *CtorInsertBefore; + OwningPtr<FunctionBlackList> BL; +}; +} // namespace + +char AddressSanitizer::ID = 0; +INITIALIZE_PASS(AddressSanitizer, "asan", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", + false, false) +AddressSanitizer::AddressSanitizer() : ModulePass(ID) { } +ModulePass *llvm::createAddressSanitizerPass() { + return new AddressSanitizer(); +} + +const char *AddressSanitizer::getPassName() const { + return "AddressSanitizer"; +} + +// Create a constant for Str so that we can pass it to the run-time lib. +static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { + Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); + return new GlobalVariable(M, StrConst->getType(), true, + GlobalValue::PrivateLinkage, StrConst, ""); +} + +// Split the basic block and insert an if-then code. +// Before: +// Head +// SplitBefore +// Tail +// After: +// Head +// if (Cmp) +// NewBasicBlock +// SplitBefore +// Tail +// +// Returns the NewBasicBlock's terminator. +BranchInst *AddressSanitizer::splitBlockAndInsertIfThen( + Instruction *SplitBefore, Value *Cmp) { + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + BasicBlock *NewBasicBlock = + BasicBlock::Create(*C, "", Head->getParent()); + BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock, + /*ifFalse*/Tail, + Cmp); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + + BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock); + return CheckTerm; +} + +Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { + // Shadow >> scale + Shadow = IRB.CreateLShr(Shadow, MappingScale); + if (MappingOffset == 0) + return Shadow; + // (Shadow >> scale) | offset + return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, + MappingOffset)); +} + +void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns, + Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { + // Check the first byte. + { + IRBuilder<> IRB(InsertBefore); + instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); + } + // Check the last byte. + { + IRBuilder<> IRB(InsertBefore); + Value *SizeMinusOne = IRB.CreateSub( + Size, ConstantInt::get(Size->getType(), 1)); + SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); + instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); + } +} + +// Instrument memset/memmove/memcpy +bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + Value *Dst = MI->getDest(); + MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI); + Value *Src = MemTran ? MemTran->getSource() : NULL; + Value *Length = MI->getLength(); + + Constant *ConstLength = dyn_cast<Constant>(Length); + Instruction *InsertBefore = MI; + if (ConstLength) { + if (ConstLength->isNullValue()) return false; + } else { + // The size is not a constant so it could be zero -- check at run-time. + IRBuilder<> IRB(InsertBefore); + + Value *Cmp = IRB.CreateICmpNE(Length, + Constant::getNullValue(Length->getType())); + InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp); + } + + instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); + if (Src) + instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); + return true; +} + +static Value *getLDSTOperand(Instruction *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + return LI->getPointerOperand(); + } + return cast<StoreInst>(*I).getPointerOperand(); +} + +void AddressSanitizer::instrumentMop(Instruction *I) { + int IsWrite = isa<StoreInst>(*I); + Value *Addr = getLDSTOperand(I); + if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { + // We are accessing a global scalar variable. Nothing to catch here. + return; + } + Type *OrigPtrTy = Addr->getType(); + Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); + + assert(OrigTy->isSized()); + uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); + + if (TypeSize != 8 && TypeSize != 16 && + TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + // Ignore all unusual sizes. + return; + } + + IRBuilder<> IRB(I); + instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); +} + +Instruction *AddressSanitizer::generateCrashCode( + IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) { + // IsWrite and TypeSize are encoded in the function name. + std::string FunctionName = std::string(kAsanReportErrorTemplate) + + (IsWrite ? "store" : "load") + itostr(TypeSize / 8); + Value *ReportWarningFunc = CurrentModule->getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IntptrTy, NULL); + CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr); + Call->setDoesNotReturn(); + return Call; +} + +void AddressSanitizer::instrumentAddress(Instruction *OrigIns, + IRBuilder<> &IRB, Value *Addr, + uint32_t TypeSize, bool IsWrite) { + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + + Type *ShadowTy = IntegerType::get( + *C, std::max(8U, TypeSize >> MappingScale)); + Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); + Value *ShadowPtr = memToShadow(AddrLong, IRB); + Value *CmpVal = Constant::getNullValue(ShadowTy); + Value *ShadowValue = IRB.CreateLoad( + IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + + Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); + + Instruction *CheckTerm = splitBlockAndInsertIfThen( + cast<Instruction>(Cmp)->getNextNode(), Cmp); + IRBuilder<> IRB2(CheckTerm); + + size_t Granularity = 1 << MappingScale; + if (TypeSize < 8 * Granularity) { + // Addr & (Granularity - 1) + Value *Lower3Bits = IRB2.CreateAnd( + AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + // (Addr & (Granularity - 1)) + size - 1 + Value *LastAccessedByte = IRB2.CreateAdd( + Lower3Bits, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); + // (uint8_t) ((Addr & (Granularity-1)) + size - 1) + LastAccessedByte = IRB2.CreateIntCast( + LastAccessedByte, IRB.getInt8Ty(), false); + // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue + Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue); + + CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2); + } + + IRBuilder<> IRB1(CheckTerm); + Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize); + Crash->setDebugLoc(OrigIns->getDebugLoc()); + ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C)); +} + +// This function replaces all global variables with new variables that have +// trailing redzones. It also creates a function that poisons +// redzones and inserts this function into llvm.global_ctors. +bool AddressSanitizer::insertGlobalRedzones(Module &M) { + SmallVector<GlobalVariable *, 16> GlobalsToChange; + + for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), + E = M.getGlobalList().end(); G != E; ++G) { + Type *Ty = cast<PointerType>(G->getType())->getElementType(); + DEBUG(dbgs() << "GLOBAL: " << *G); + + if (!Ty->isSized()) continue; + if (!G->hasInitializer()) continue; + // Touch only those globals that will not be defined in other modules. + // Don't handle ODR type linkages since other modules may be built w/o asan. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + G->getLinkage() != GlobalVariable::PrivateLinkage && + G->getLinkage() != GlobalVariable::InternalLinkage) + continue; + // Two problems with thread-locals: + // - The address of the main thread's copy can't be computed at link-time. + // - Need to poison all copies, not just the main thread's one. + if (G->isThreadLocal()) + continue; + // For now, just ignore this Alloca if the alignment is large. + if (G->getAlignment() > RedzoneSize) continue; + + // Ignore all the globals with the names starting with "\01L_OBJC_". + // Many of those are put into the .cstring section. The linker compresses + // that section by removing the spare \0s after the string terminator, so + // our redzones get broken. + if ((G->getName().find("\01L_OBJC_") == 0) || + (G->getName().find("\01l_OBJC_") == 0)) { + DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); + continue; + } + + if (G->hasSection()) { + StringRef Section(G->getSection()); + // Ignore the globals from the __OBJC section. The ObjC runtime assumes + // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to + // them. + if ((Section.find("__OBJC,") == 0) || + (Section.find("__DATA, __objc_") == 0)) { + DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); + continue; + } + // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 + // Constant CFString instances are compiled in the following way: + // -- the string buffer is emitted into + // __TEXT,__cstring,cstring_literals + // -- the constant NSConstantString structure referencing that buffer + // is placed into __DATA,__cfstring + // Therefore there's no point in placing redzones into __DATA,__cfstring. + // Moreover, it causes the linker to crash on OS X 10.7 + if (Section.find("__DATA,__cfstring") == 0) { + DEBUG(dbgs() << "Ignoring CFString: " << *G); + continue; + } + } + + GlobalsToChange.push_back(G); + } + + size_t n = GlobalsToChange.size(); + if (n == 0) return false; + + // A global is described by a structure + // size_t beg; + // size_t size; + // size_t size_with_redzone; + // const char *name; + // We initialize an array of such structures and pass it to a run-time call. + StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, + IntptrTy, IntptrTy, NULL); + SmallVector<Constant *, 16> Initializers(n); + + IRBuilder<> IRB(CtorInsertBefore); + + for (size_t i = 0; i < n; i++) { + GlobalVariable *G = GlobalsToChange[i]; + PointerType *PtrTy = cast<PointerType>(G->getType()); + Type *Ty = PtrTy->getElementType(); + uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); + uint64_t RightRedzoneSize = RedzoneSize + + (RedzoneSize - (SizeInBytes % RedzoneSize)); + Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + + StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); + Constant *NewInitializer = ConstantStruct::get( + NewTy, G->getInitializer(), + Constant::getNullValue(RightRedZoneTy), NULL); + + SmallString<2048> DescriptionOfGlobal = G->getName(); + DescriptionOfGlobal += " ("; + DescriptionOfGlobal += M.getModuleIdentifier(); + DescriptionOfGlobal += ")"; + GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + + // Create a new global variable with enough space for a redzone. + GlobalVariable *NewGlobal = new GlobalVariable( + M, NewTy, G->isConstant(), G->getLinkage(), + NewInitializer, "", G, G->isThreadLocal()); + NewGlobal->copyAttributesFrom(G); + NewGlobal->setAlignment(RedzoneSize); + + Value *Indices2[2]; + Indices2[0] = IRB.getInt32(0); + Indices2[1] = IRB.getInt32(0); + + G->replaceAllUsesWith( + ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true)); + NewGlobal->takeName(G); + G->eraseFromParent(); + + Initializers[i] = ConstantStruct::get( + GlobalStructTy, + ConstantExpr::getPointerCast(NewGlobal, IntptrTy), + ConstantInt::get(IntptrTy, SizeInBytes), + ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), + ConstantExpr::getPointerCast(Name, IntptrTy), + NULL); + DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); + } + + ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); + GlobalVariable *AllGlobals = new GlobalVariable( + M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, + ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + + Function *AsanRegisterGlobals = cast<Function>(M.getOrInsertFunction( + kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); + + IRB.CreateCall2(AsanRegisterGlobals, + IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)); + + // We also need to unregister globals at the end, e.g. when a shared library + // gets closed. + Function *AsanDtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); + IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); + Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction( + kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + + IRB_Dtor.CreateCall2(AsanUnregisterGlobals, + IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority); + + DEBUG(dbgs() << M); + return true; +} + +// virtual +bool AddressSanitizer::runOnModule(Module &M) { + // Initialize the private fields. No one has accessed them before. + TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) + return false; + BL.reset(new FunctionBlackList(ClBlackListFile)); + + CurrentModule = &M; + C = &(M.getContext()); + LongSize = TD->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + IntptrPtrTy = PointerType::get(IntptrTy, 0); + + AsanCtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); + BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); + CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB); + + // call __asan_init in the module ctor. + IRBuilder<> IRB(CtorInsertBefore); + AsanInitFunction = cast<Function>( + M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); + AsanInitFunction->setLinkage(Function::ExternalLinkage); + IRB.CreateCall(AsanInitFunction); + + MappingOffset = LongSize == 32 + ? kDefaultShadowOffset32 : kDefaultShadowOffset64; + if (ClMappingOffsetLog >= 0) { + if (ClMappingOffsetLog == 0) { + // special case + MappingOffset = 0; + } else { + MappingOffset = 1ULL << ClMappingOffsetLog; + } + } + MappingScale = kDefaultShadowScale; + if (ClMappingScale) { + MappingScale = ClMappingScale; + } + // Redzone used for stack and globals is at least 32 bytes. + // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. + RedzoneSize = std::max(32, (int)(1 << MappingScale)); + + bool Res = false; + + if (ClGlobals) + Res |= insertGlobalRedzones(M); + + if (ClMappingOffsetLog >= 0) { + // Tell the run-time the current values of mapping offset and scale. + GlobalValue *asan_mapping_offset = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, MappingOffset), + kAsanMappingOffsetName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_offset, true); + } + if (ClMappingScale) { + GlobalValue *asan_mapping_scale = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, MappingScale), + kAsanMappingScaleName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_scale, true); + } + + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + Res |= handleFunction(M, *F); + } + + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); + + return Res; +} + +bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { + // For each NSObject descendant having a +load method, this method is invoked + // by the ObjC runtime before any of the static constructors is called. + // Therefore we need to instrument such methods with a call to __asan_init + // at the beginning in order to initialize our runtime before any access to + // the shadow memory. + // We cannot just ignore these methods, because they may call other + // instrumented functions. + if (F.getName().find(" load]") != std::string::npos) { + IRBuilder<> IRB(F.begin()->begin()); + IRB.CreateCall(AsanInitFunction); + return true; + } + return false; +} + +bool AddressSanitizer::handleFunction(Module &M, Function &F) { + if (BL->isIn(F)) return false; + if (&F == AsanCtorFunction) return false; + + // If needed, insert __asan_init before checking for AddressSafety attr. + maybeInsertAsanInitAtFunctionEntry(F); + + if (!F.hasFnAttr(Attribute::AddressSafety)) return false; + + if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) + return false; + // We want to instrument every address only once per basic block + // (unless there are calls between uses). + SmallSet<Value*, 16> TempsToInstrument; + SmallVector<Instruction*, 16> ToInstrument; + SmallVector<Instruction*, 8> NoReturnCalls; + + // Fill the set of memory operations to instrument. + for (Function::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) { + TempsToInstrument.clear(); + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if (LooksLikeCodeInBug11395(BI)) return false; + if ((isa<LoadInst>(BI) && ClInstrumentReads) || + (isa<StoreInst>(BI) && ClInstrumentWrites)) { + Value *Addr = getLDSTOperand(BI); + if (ClOpt && ClOptSameTemp) { + if (!TempsToInstrument.insert(Addr)) + continue; // We've seen this temp in the current BB. + } + } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) { + // ok, take it. + } else { + if (CallInst *CI = dyn_cast<CallInst>(BI)) { + // A call inside BB. + TempsToInstrument.clear(); + if (CI->doesNotReturn()) { + NoReturnCalls.push_back(CI); + } + } + continue; + } + ToInstrument.push_back(BI); + } + } + + // Instrument. + int NumInstrumented = 0; + for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { + Instruction *Inst = ToInstrument[i]; + if (ClDebugMin < 0 || ClDebugMax < 0 || + (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { + if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst)) + instrumentMop(Inst); + else + instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); + } + NumInstrumented++; + } + + DEBUG(dbgs() << F); + + bool ChangedStack = poisonStackInFunction(M, F); + + // We must unpoison the stack before every NoReturn call (throw, _exit, etc). + // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 + for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) { + Instruction *CI = NoReturnCalls[i]; + IRBuilder<> IRB(CI); + IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName, + IRB.getVoidTy(), NULL)); + } + + return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty(); +} + +static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) { + if (ShadowRedzoneSize == 1) return PoisonByte; + if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte; + if (ShadowRedzoneSize == 4) + return (PoisonByte << 24) + (PoisonByte << 16) + + (PoisonByte << 8) + (PoisonByte); + llvm_unreachable("ShadowRedzoneSize is either 1, 2 or 4"); +} + +static void PoisonShadowPartialRightRedzone(uint8_t *Shadow, + size_t Size, + size_t RedzoneSize, + size_t ShadowGranularity, + uint8_t Magic) { + for (size_t i = 0; i < RedzoneSize; + i+= ShadowGranularity, Shadow++) { + if (i + ShadowGranularity <= Size) { + *Shadow = 0; // fully addressable + } else if (i >= Size) { + *Shadow = Magic; // unaddressable + } else { + *Shadow = Size - i; // first Size-i bytes are addressable + } + } +} + +void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, + IRBuilder<> IRB, + Value *ShadowBase, bool DoPoison) { + size_t ShadowRZSize = RedzoneSize >> MappingScale; + assert(ShadowRZSize >= 1 && ShadowRZSize <= 4); + Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8); + Type *RZPtrTy = PointerType::get(RZTy, 0); + + Value *PoisonLeft = ConstantInt::get(RZTy, + ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize)); + Value *PoisonMid = ConstantInt::get(RZTy, + ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize)); + Value *PoisonRight = ConstantInt::get(RZTy, + ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize)); + + // poison the first red zone. + IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy)); + + // poison all other red zones. + uint64_t Pos = RedzoneSize; + for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { + AllocaInst *AI = AllocaVec[i]; + uint64_t SizeInBytes = getAllocaSizeInBytes(AI); + uint64_t AlignedSize = getAlignedAllocaSize(AI); + assert(AlignedSize - SizeInBytes < RedzoneSize); + Value *Ptr = NULL; + + Pos += AlignedSize; + + assert(ShadowBase->getType() == IntptrTy); + if (SizeInBytes < AlignedSize) { + // Poison the partial redzone at right + Ptr = IRB.CreateAdd( + ShadowBase, ConstantInt::get(IntptrTy, + (Pos >> MappingScale) - ShadowRZSize)); + size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes); + uint32_t Poison = 0; + if (DoPoison) { + PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes, + RedzoneSize, + 1ULL << MappingScale, + kAsanStackPartialRedzoneMagic); + } + Value *PartialPoison = ConstantInt::get(RZTy, Poison); + IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); + } + + // Poison the full redzone at right. + Ptr = IRB.CreateAdd(ShadowBase, + ConstantInt::get(IntptrTy, Pos >> MappingScale)); + Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid; + IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); + + Pos += RedzoneSize; + } +} + +// Workaround for bug 11395: we don't want to instrument stack in functions +// with large assembly blobs (32-bit only), otherwise reg alloc may crash. +// FIXME: remove once the bug 11395 is fixed. +bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { + if (LongSize != 32) return false; + CallInst *CI = dyn_cast<CallInst>(I); + if (!CI || !CI->isInlineAsm()) return false; + if (CI->getNumArgOperands() <= 5) return false; + // We have inline assembly with quite a few arguments. + return true; +} + +// Find all static Alloca instructions and put +// poisoned red zones around all of them. +// Then unpoison everything back before the function returns. +// +// Stack poisoning does not play well with exception handling. +// When an exception is thrown, we essentially bypass the code +// that unpoisones the stack. This is why the run-time library has +// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire +// stack in the interceptor. This however does not work inside the +// actual function which catches the exception. Most likely because the +// compiler hoists the load of the shadow value somewhere too high. +// This causes asan to report a non-existing bug on 453.povray. +// It sounds like an LLVM bug. +bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { + if (!ClStack) return false; + SmallVector<AllocaInst*, 16> AllocaVec; + SmallVector<Instruction*, 8> RetVec; + uint64_t TotalSize = 0; + + // Filter out Alloca instructions we want (and can) handle. + // Collect Ret instructions. + for (Function::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) { + BasicBlock &BB = *FI; + for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); + BI != BE; ++BI) { + if (isa<ReturnInst>(BI)) { + RetVec.push_back(BI); + continue; + } + + AllocaInst *AI = dyn_cast<AllocaInst>(BI); + if (!AI) continue; + if (AI->isArrayAllocation()) continue; + if (!AI->isStaticAlloca()) continue; + if (!AI->getAllocatedType()->isSized()) continue; + if (AI->getAlignment() > RedzoneSize) continue; + AllocaVec.push_back(AI); + uint64_t AlignedSize = getAlignedAllocaSize(AI); + TotalSize += AlignedSize; + } + } + + if (AllocaVec.empty()) return false; + + uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize; + + bool DoStackMalloc = ClUseAfterReturn + && LocalStackSize <= kMaxStackMallocSize; + + Instruction *InsBefore = AllocaVec[0]; + IRBuilder<> IRB(InsBefore); + + + Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); + AllocaInst *MyAlloca = + new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); + MyAlloca->setAlignment(RedzoneSize); + assert(MyAlloca->isStaticAlloca()); + Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy); + Value *LocalStackBase = OrigStackBase; + + if (DoStackMalloc) { + Value *AsanStackMallocFunc = M.getOrInsertFunction( + kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL); + LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc, + ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); + } + + // This string will be parsed by the run-time (DescribeStackAddress). + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + StackDescription << F.getName() << " " << AllocaVec.size() << " "; + + uint64_t Pos = RedzoneSize; + // Replace Alloca instructions with base+offset. + for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { + AllocaInst *AI = AllocaVec[i]; + uint64_t SizeInBytes = getAllocaSizeInBytes(AI); + StringRef Name = AI->getName(); + StackDescription << Pos << " " << SizeInBytes << " " + << Name.size() << " " << Name << " "; + uint64_t AlignedSize = getAlignedAllocaSize(AI); + assert((AlignedSize % RedzoneSize) == 0); + AI->replaceAllUsesWith( + IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)), + AI->getType())); + Pos += AlignedSize + RedzoneSize; + } + assert(Pos == LocalStackSize); + + // Write the Magic value and the frame description constant to the redzone. + Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); + IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), + BasePlus0); + Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, LongSize/8)); + BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); + Value *Description = IRB.CreatePointerCast( + createPrivateGlobalForString(M, StackDescription.str()), + IntptrTy); + IRB.CreateStore(Description, BasePlus1); + + // Poison the stack redzones at the entry. + Value *ShadowBase = memToShadow(LocalStackBase, IRB); + PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true); + + Value *AsanStackFreeFunc = NULL; + if (DoStackMalloc) { + AsanStackFreeFunc = M.getOrInsertFunction( + kAsanStackFreeName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy, NULL); + } + + // Unpoison the stack before all ret instructions. + for (size_t i = 0, n = RetVec.size(); i < n; i++) { + Instruction *Ret = RetVec[i]; + IRBuilder<> IRBRet(Ret); + + // Mark the current frame as retired. + IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), + BasePlus0); + // Unpoison the stack. + PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false); + + if (DoStackMalloc) { + IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase, + ConstantInt::get(IntptrTy, LocalStackSize), + OrigStackBase); + } + } + + if (ClDebugStack) { + DEBUG(dbgs() << F); + } + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp new file mode 100644 index 0000000..188ea4d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp @@ -0,0 +1,79 @@ +//===-- FunctionBlackList.cpp - blacklist of functions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions based on +// user-supplied blacklist. +// +//===----------------------------------------------------------------------===// + +#include "FunctionBlackList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +namespace llvm { + +FunctionBlackList::FunctionBlackList(const std::string &Path) { + Functions = NULL; + const char *kFunPrefix = "fun:"; + if (!Path.size()) return; + std::string Fun; + + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) { + report_fatal_error("Can't open blacklist file " + Path + ": " + + EC.message()); + } + MemoryBuffer *Buff = File.take(); + const char *Data = Buff->getBufferStart(); + size_t DataLen = Buff->getBufferSize(); + SmallVector<StringRef, 16> Lines; + SplitString(StringRef(Data, DataLen), Lines, "\n\r"); + for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { + if (Lines[i].startswith(kFunPrefix)) { + std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); + std::string ThisFuncRE; + // add ThisFunc replacing * with .* + for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { + if (ThisFunc[j] == '*') + ThisFuncRE += '.'; + ThisFuncRE += ThisFunc[j]; + } + // Check that the regexp is valid. + Regex CheckRE(ThisFuncRE); + std::string Error; + if (!CheckRE.isValid(Error)) + report_fatal_error("malformed blacklist regex: " + ThisFunc + + ": " + Error); + // Append to the final regexp. + if (Fun.size()) + Fun += "|"; + Fun += ThisFuncRE; + } + } + if (Fun.size()) { + Functions = new Regex(Fun); + } +} + +bool FunctionBlackList::isIn(const Function &F) { + if (Functions) { + bool Res = Functions->match(F.getName()); + return Res; + } + return false; +} + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h b/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h new file mode 100644 index 0000000..c1239b9 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h @@ -0,0 +1,37 @@ +//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions based on +// user-supplied blacklist. +// +//===----------------------------------------------------------------------===// +// + +#include <string> + +namespace llvm { +class Function; +class Regex; + +// Blacklisted functions are not instrumented. +// The blacklist file contains one or more lines like this: +// --- +// fun:FunctionWildCard +// --- +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +class FunctionBlackList { + public: + FunctionBlackList(const std::string &Path); + bool isIn(const Function &F); + private: + Regex *Functions; +}; + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index ccf7e11..96e5d5b 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -43,12 +43,14 @@ namespace { public: static char ID; GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) { + : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), + UseExtraChecksum(false) { initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false) + GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false, + bool useExtraChecksum = false) : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - Use402Format(use402Format) { + Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) { assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } @@ -94,6 +96,7 @@ namespace { bool EmitNotes; bool EmitData; bool Use402Format; + bool UseExtraChecksum; Module *M; LLVMContext *Ctx; @@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - bool Use402Format) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format); + bool Use402Format, + bool UseExtraChecksum) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum); } namespace { @@ -167,7 +171,7 @@ namespace { } uint32_t length() { - // Here 2 = 1 for string lenght + 1 for '0' id#. + // Here 2 = 1 for string length + 1 for '0' id#. return lengthOfGCOVString(Filename) + 2 + Lines.size(); } @@ -244,10 +248,12 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) { + GCOVFunction(DISubprogram SP, raw_ostream *os, + bool Use402Format, bool UseExtraChecksum) { this->os = os; Function *F = SP.getFunction(); + DEBUG(dbgs() << "Function: " << F->getName() << "\n"); uint32_t i = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { Blocks[BB] = new GCOVBlock(i++, os); @@ -257,14 +263,14 @@ namespace { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (!Use402Format) - ++BlockLen; // For second checksum. + if (UseExtraChecksum) + ++BlockLen; write(BlockLen); uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); - write(0); // checksum #1 - if (!Use402Format) - write(0); // checksum #2 + write(0); // lineno checksum + if (UseExtraChecksum) + write(0); // cfg checksum writeGCOVString(SP.getName()); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); @@ -290,6 +296,7 @@ namespace { for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { write(0); // No flags on our blocks. } + DEBUG(dbgs() << Blocks.size() << " blocks.\n"); // Emit edges between blocks. for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), @@ -301,6 +308,8 @@ namespace { write(Block.OutEdges.size() * 2 + 1); write(Block.Number); for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number + << "\n"); write(Block.OutEdges[i]->Number); write(0); // no flags } @@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) { } void GCOVProfiler::emitGCNO() { - DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (CU_Nodes) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - // Each compile unit gets its own .gcno file. This means that whether we run - // this pass over the original .o's as they're produced, or run it after - // LTO, we'll generate the same .gcno files. - - DICompileUnit CU(CU_Nodes->getOperand(i)); - raw_fd_ostream *&out = GcnoFiles[CU]; - std::string ErrorInfo; - out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, - raw_fd_ostream::F_Binary); - if (!Use402Format) - out->write("oncg*404MVLL", 12); - else - out->write("oncg*204MVLL", 12); - - DIArray SPs = CU.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - if (!SP.Verify()) continue; - raw_fd_ostream *&os = GcnoFiles[CU]; - - Function *F = SP.getFunction(); - if (!F) continue; - GCOVFunction Func(SP, os, Use402Format); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); - TerminatorInst *TI = BB->getTerminator(); - if (int successors = TI->getNumSuccessors()) { - for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); - } - } else if (isa<ReturnInst>(TI)) { - Block.addEdge(Func.getReturnBlock()); - } - - uint32_t Line = 0; - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { - const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; - if (Line == Loc.getLine()) continue; - Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; - - GCOVLines &Lines = Block.getFile(SP.getFilename()); - Lines.addLine(Loc.getLine()); + if (!CU_Nodes) return; + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(CU_Nodes->getOperand(i)); + std::string ErrorInfo; + raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out.write("oncg*404MVLL", 12); + else + out.write("oncg*204MVLL", 12); + + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); + I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); } - Func.writeOut(); } + Func.writeOut(); } - } - - for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator - I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { - raw_fd_ostream *&out = I->second; - out->write("\0\0\0\0\0\0\0\0", 8); // EOF - out->close(); - delete out; + out.write("\0\0\0\0\0\0\0\0", 8); // EOF + out.close(); } } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 71adc1e..c7266e2 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -24,6 +24,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); initializeGCOVProfilerPass(Registry); + initializeAddressSanitizerPass(Registry); + initializeThreadSanitizerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 62c21b8..1fe1254 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -69,7 +69,7 @@ inline static void printEdgeCounter(ProfileInfo::Edge e, BasicBlock* b, unsigned i) { DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \ - << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); + << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n"); } bool OptimalEdgeProfiler::runOnModule(Module &M) { @@ -127,7 +127,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp index 23915d3..b214796 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -665,7 +665,7 @@ void BLInstrumentationDag::unlinkPhony() { // Generate a .dot graph to represent the DAG and pathNumbers void BLInstrumentationDag::generateDotGraph() { std::string errorInfo; - std::string functionName = getFunction().getNameStr(); + std::string functionName = getFunction().getName().str(); std::string filename = "pathdag." + functionName + ".dot"; DEBUG (dbgs() << "Writing '" << filename << "'...\n"); @@ -750,7 +750,8 @@ Value* BLInstrumentationNode::getStartingPathNumber(){ // Sets the Value of the pathNumber. Used by the instrumentation code. void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) { DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ? - pathNumber->getNameStr() : "unused") << "\n"); + pathNumber->getName() : + "unused") << "\n"); _startingPathNumber = pathNumber; } @@ -760,7 +761,7 @@ Value* BLInstrumentationNode::getEndingPathNumber(){ void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) { DEBUG(dbgs() << " EPN-" << getName() << " <-- " - << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n"); + << (pathNumber ? pathNumber->getName() : "unused") << "\n"); _endingPathNumber = pathNumber; } @@ -1239,9 +1240,9 @@ void PathProfiler::insertInstrumentation( insertPoint++; DEBUG(dbgs() << "\nInstrumenting method call block '" - << node->getBlock()->getNameStr() << "'\n"); + << node->getBlock()->getName() << "'\n"); DEBUG(dbgs() << " Path number initialized: " - << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n"); + << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n"); Value* newpn; if( node->getStartingPathNumber() ) { @@ -1370,7 +1371,7 @@ bool PathProfiler::runOnModule(Module &M) { if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n"); + DEBUG(dbgs() << "Function: " << F->getName() << "\n"); functionNumber++; // set function number diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp new file mode 100644 index 0000000..8bb337e --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -0,0 +1,311 @@ +//===-- ThreadSanitizer.cpp - race detector -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer, a race detector. +// +// The tool is under development, for the details about previous versions see +// http://code.google.com/p/data-race-test +// +// The instrumentation phase is quite simple: +// - Insert calls to run-time library before every memory access. +// - Optimizations may apply to avoid instrumenting some of the accesses. +// - Insert calls at function entry/exit. +// The rest is handled by the run-time library. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tsan" + +#include "FunctionBlackList.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Intrinsics.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" + +using namespace llvm; + +static cl::opt<std::string> ClBlackListFile("tsan-blacklist", + cl::desc("Blacklist file"), cl::Hidden); + +static cl::opt<bool> ClPrintStats("tsan-print-stats", + cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden); + +namespace { + +// Stats counters for ThreadSanitizer instrumentation. +struct ThreadSanitizerStats { + size_t NumInstrumentedReads; + size_t NumInstrumentedWrites; + size_t NumOmittedReadsBeforeWrite; + size_t NumAccessesWithBadSize; + size_t NumInstrumentedVtableWrites; + size_t NumOmittedReadsFromConstantGlobals; + size_t NumOmittedReadsFromVtable; +}; + +/// ThreadSanitizer: instrument the code in module to find races. +struct ThreadSanitizer : public FunctionPass { + ThreadSanitizer(); + bool runOnFunction(Function &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + bool instrumentLoadOrStore(Instruction *I); + static char ID; // Pass identification, replacement for typeid. + + private: + void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, + SmallVectorImpl<Instruction*> &All); + bool addrPointsToConstantData(Value *Addr); + + TargetData *TD; + OwningPtr<FunctionBlackList> BL; + // Callbacks to run-time library are computed in doInitialization. + Value *TsanFuncEntry; + Value *TsanFuncExit; + // Accesses sizes are powers of two: 1, 2, 4, 8, 16. + static const size_t kNumberOfAccessSizes = 5; + Value *TsanRead[kNumberOfAccessSizes]; + Value *TsanWrite[kNumberOfAccessSizes]; + Value *TsanVptrUpdate; + + // Stats are modified w/o synchronization. + ThreadSanitizerStats stats; +}; +} // namespace + +char ThreadSanitizer::ID = 0; +INITIALIZE_PASS(ThreadSanitizer, "tsan", + "ThreadSanitizer: detects data races.", + false, false) + +ThreadSanitizer::ThreadSanitizer() + : FunctionPass(ID), + TD(NULL) { +} + +FunctionPass *llvm::createThreadSanitizerPass() { + return new ThreadSanitizer(); +} + +bool ThreadSanitizer::doInitialization(Module &M) { + TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) + return false; + BL.reset(new FunctionBlackList(ClBlackListFile)); + memset(&stats, 0, sizeof(stats)); + + // Always insert a call to __tsan_init into the module's CTORs. + IRBuilder<> IRB(M.getContext()); + Value *TsanInit = M.getOrInsertFunction("__tsan_init", + IRB.getVoidTy(), NULL); + appendToGlobalCtors(M, cast<Function>(TsanInit), 0); + + // Initialize the callbacks. + TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(), + IRB.getInt8PtrTy(), NULL); + TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), + NULL); + for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { + SmallString<32> ReadName("__tsan_read"); + ReadName += itostr(1 << i); + TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(), + IRB.getInt8PtrTy(), NULL); + SmallString<32> WriteName("__tsan_write"); + WriteName += itostr(1 << i); + TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(), + IRB.getInt8PtrTy(), NULL); + } + TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + NULL); + return true; +} + +bool ThreadSanitizer::doFinalization(Module &M) { + if (ClPrintStats) { + errs() << "ThreadSanitizerStats " << M.getModuleIdentifier() + << ": wr " << stats.NumInstrumentedWrites + << "; rd " << stats.NumInstrumentedReads + << "; vt " << stats.NumInstrumentedVtableWrites + << "; bs " << stats.NumAccessesWithBadSize + << "; rbw " << stats.NumOmittedReadsBeforeWrite + << "; rcg " << stats.NumOmittedReadsFromConstantGlobals + << "; rvt " << stats.NumOmittedReadsFromVtable + << "\n"; + } + return true; +} + +static bool isVtableAccess(Instruction *I) { + if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { + if (Tag->getNumOperands() < 1) return false; + if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + } + return false; +} + +bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { + // If this is a GEP, just analyze its pointer operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) + Addr = GEP->getPointerOperand(); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { + if (GV->isConstant()) { + // Reads from constant globals can not race with any writes. + stats.NumOmittedReadsFromConstantGlobals++; + return true; + } + } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { + if (isVtableAccess(L)) { + // Reads from a vtable pointer can not race with any writes. + stats.NumOmittedReadsFromVtable++; + return true; + } + } + return false; +} + +// Instrumenting some of the accesses may be proven redundant. +// Currently handled: +// - read-before-write (within same BB, no calls between) +// +// We do not handle some of the patterns that should not survive +// after the classic compiler optimizations. +// E.g. two reads from the same temp should be eliminated by CSE, +// two writes should be eliminated by DSE, etc. +// +// 'Local' is a vector of insns within the same BB (no calls between). +// 'All' is a vector of insns that will be instrumented. +void ThreadSanitizer::choseInstructionsToInstrument( + SmallVectorImpl<Instruction*> &Local, + SmallVectorImpl<Instruction*> &All) { + SmallSet<Value*, 8> WriteTargets; + // Iterate from the end. + for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), + E = Local.rend(); It != E; ++It) { + Instruction *I = *It; + if (StoreInst *Store = dyn_cast<StoreInst>(I)) { + WriteTargets.insert(Store->getPointerOperand()); + } else { + LoadInst *Load = cast<LoadInst>(I); + Value *Addr = Load->getPointerOperand(); + if (WriteTargets.count(Addr)) { + // We will write to this temp, so no reason to analyze the read. + stats.NumOmittedReadsBeforeWrite++; + continue; + } + if (addrPointsToConstantData(Addr)) { + // Addr points to some constant data -- it can not race with any writes. + continue; + } + } + All.push_back(I); + } + Local.clear(); +} + +bool ThreadSanitizer::runOnFunction(Function &F) { + if (!TD) return false; + if (BL->isIn(F)) return false; + SmallVector<Instruction*, 8> RetVec; + SmallVector<Instruction*, 8> AllLoadsAndStores; + SmallVector<Instruction*, 8> LocalLoadsAndStores; + bool Res = false; + bool HasCalls = false; + + // Traverse all instructions, collect loads/stores/returns, check for calls. + for (Function::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) { + BasicBlock &BB = *FI; + for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); + BI != BE; ++BI) { + if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) + LocalLoadsAndStores.push_back(BI); + else if (isa<ReturnInst>(BI)) + RetVec.push_back(BI); + else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { + HasCalls = true; + choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + } + } + choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + } + + // We have collected all loads and stores. + // FIXME: many of these accesses do not need to be checked for races + // (e.g. variables that do not escape, etc). + + // Instrument memory accesses. + for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { + Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); + } + + // Instrument function entry/exit points if there were instrumented accesses. + if (Res || HasCalls) { + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + Value *ReturnAddress = IRB.CreateCall( + Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), + IRB.getInt32(0)); + IRB.CreateCall(TsanFuncEntry, ReturnAddress); + for (size_t i = 0, n = RetVec.size(); i < n; ++i) { + IRBuilder<> IRBRet(RetVec[i]); + IRBRet.CreateCall(TsanFuncExit); + } + Res = true; + } + return Res; +} + +bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { + IRBuilder<> IRB(I); + bool IsWrite = isa<StoreInst>(*I); + Value *Addr = IsWrite + ? cast<StoreInst>(I)->getPointerOperand() + : cast<LoadInst>(I)->getPointerOperand(); + Type *OrigPtrTy = Addr->getType(); + Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); + assert(OrigTy->isSized()); + uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); + if (TypeSize != 8 && TypeSize != 16 && + TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + stats.NumAccessesWithBadSize++; + // Ignore all unusual sizes. + return false; + } + if (IsWrite && isVtableAccess(I)) { + Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); + IRB.CreateCall2(TsanVptrUpdate, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); + stats.NumInstrumentedVtableWrites++; + return true; + } + size_t Idx = CountTrailingZeros_32(TypeSize / 8); + assert(Idx < kNumberOfAccessSizes); + Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; + IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + if (IsWrite) stats.NumInstrumentedWrites++; + else stats.NumInstrumentedReads++; + return true; +} |