diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 1623 |
1 files changed, 1623 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp new file mode 100644 index 0000000..8f24476 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -0,0 +1,1623 @@ +//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow +/// analysis. +/// +/// Unlike other Sanitizer tools, this tool is not designed to detect a specific +/// class of bugs on its own. Instead, it provides a generic dynamic data flow +/// analysis framework to be used by clients to help detect application-specific +/// issues within their own code. +/// +/// The analysis is based on automatic propagation of data flow labels (also +/// known as taint labels) through a program as it performs computation. Each +/// byte of application memory is backed by two bytes of shadow memory which +/// hold the label. On Linux/x86_64, memory is laid out as follows: +/// +/// +--------------------+ 0x800000000000 (top of memory) +/// | application memory | +/// +--------------------+ 0x700000008000 (kAppAddr) +/// | | +/// | unused | +/// | | +/// +--------------------+ 0x200200000000 (kUnusedAddr) +/// | union table | +/// +--------------------+ 0x200000000000 (kUnionTableAddr) +/// | shadow memory | +/// +--------------------+ 0x000000010000 (kShadowAddr) +/// | reserved by kernel | +/// +--------------------+ 0x000000000000 +/// +/// To derive a shadow memory address from an application memory address, +/// bits 44-46 are cleared to bring the address into the range +/// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to +/// account for the double byte representation of shadow labels and move the +/// address into the shadow memory range. See the function +/// DataFlowSanitizer::getShadowAddress below. +/// +/// For more information, please refer to the design document: +/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html + +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +#include <iterator> +#include <set> +#include <utility> + +using namespace llvm; + +// The -dfsan-preserve-alignment flag controls whether this pass assumes that +// alignment requirements provided by the input IR are correct. For example, +// if the input IR contains a load with alignment 8, this flag will cause +// the shadow load to have alignment 16. This flag is disabled by default as +// we have unfortunately encountered too much code (including Clang itself; +// see PR14291) which performs misaligned access. +static cl::opt<bool> ClPreserveAlignment( + "dfsan-preserve-alignment", + cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, + cl::init(false)); + +// The ABI list file controls how shadow parameters are passed. The pass treats +// every function labelled "uninstrumented" in the ABI list file as conforming +// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains +// additional annotations for those functions, a call to one of those functions +// will produce a warning message, as the labelling behaviour of the function is +// unknown. The other supported annotations are "functional" and "discard", +// which are described below under DataFlowSanitizer::WrapperKind. +static cl::opt<std::string> ClABIListFile( + "dfsan-abilist", + cl::desc("File listing native ABI functions and how the pass treats them"), + cl::Hidden); + +// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented +// functions (see DataFlowSanitizer::InstrumentedABI below). +static cl::opt<bool> ClArgsABI( + "dfsan-args-abi", + cl::desc("Use the argument ABI rather than the TLS ABI"), + cl::Hidden); + +// Controls whether the pass includes or ignores the labels of pointers in load +// instructions. +static cl::opt<bool> ClCombinePointerLabelsOnLoad( + "dfsan-combine-pointer-labels-on-load", + cl::desc("Combine the label of the pointer with the label of the data when " + "loading from memory."), + cl::Hidden, cl::init(true)); + +// Controls whether the pass includes or ignores the labels of pointers in +// stores instructions. +static cl::opt<bool> ClCombinePointerLabelsOnStore( + "dfsan-combine-pointer-labels-on-store", + cl::desc("Combine the label of the pointer with the label of the data when " + "storing in memory."), + cl::Hidden, cl::init(false)); + +static cl::opt<bool> ClDebugNonzeroLabels( + "dfsan-debug-nonzero-labels", + cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " + "load or return with a nonzero label"), + cl::Hidden); + +namespace { + +StringRef GetGlobalTypeString(const GlobalValue &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast<StructType>(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return "<unknown type>"; +} + +class DFSanABIList { + std::unique_ptr<SpecialCaseList> SCL; + + public: + DFSanABIList(std::unique_ptr<SpecialCaseList> SCL) : SCL(std::move(SCL)) {} + + /// Returns whether either this function or its source file are listed in the + /// given category. + bool isIn(const Function &F, StringRef Category) const { + return isIn(*F.getParent(), Category) || + SCL->inSection("fun", F.getName(), Category); + } + + /// Returns whether this global alias is listed in the given category. + /// + /// If GA aliases a function, the alias's name is matched as a function name + /// would be. Similarly, aliases of globals are matched like globals. + bool isIn(const GlobalAlias &GA, StringRef Category) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa<FunctionType>(GA.getType()->getElementType())) + return SCL->inSection("fun", GA.getName(), Category); + + return SCL->inSection("global", GA.getName(), Category) || + SCL->inSection("type", GetGlobalTypeString(GA), Category); + } + + /// Returns whether this module is listed in the given category. + bool isIn(const Module &M, StringRef Category) const { + return SCL->inSection("src", M.getModuleIdentifier(), Category); + } +}; + +class DataFlowSanitizer : public ModulePass { + friend struct DFSanFunction; + friend class DFSanVisitor; + + enum { + ShadowWidth = 16 + }; + + /// Which ABI should be used for instrumented functions? + enum InstrumentedABI { + /// Argument and return value labels are passed through additional + /// arguments and by modifying the return type. + IA_Args, + + /// Argument and return value labels are passed through TLS variables + /// __dfsan_arg_tls and __dfsan_retval_tls. + IA_TLS + }; + + /// How should calls to uninstrumented functions be handled? + enum WrapperKind { + /// This function is present in an uninstrumented form but we don't know + /// how it should be handled. Print a warning and call the function anyway. + /// Don't label the return value. + WK_Warning, + + /// This function does not write to (user-accessible) memory, and its return + /// value is unlabelled. + WK_Discard, + + /// This function does not write to (user-accessible) memory, and the label + /// of its return value is the union of the label of its arguments. + WK_Functional, + + /// Instead of calling the function, a custom wrapper __dfsw_F is called, + /// where F is the name of the function. This function may wrap the + /// original function or provide its own implementation. This is similar to + /// the IA_Args ABI, except that IA_Args uses a struct return type to + /// pass the return value shadow in a register, while WK_Custom uses an + /// extra pointer argument to return the shadow. This allows the wrapped + /// form of the function type to be expressed in C. + WK_Custom + }; + + const DataLayout *DL; + Module *Mod; + LLVMContext *Ctx; + IntegerType *ShadowTy; + PointerType *ShadowPtrTy; + IntegerType *IntptrTy; + ConstantInt *ZeroShadow; + ConstantInt *ShadowPtrMask; + ConstantInt *ShadowPtrMul; + Constant *ArgTLS; + Constant *RetvalTLS; + void *(*GetArgTLSPtr)(); + void *(*GetRetvalTLSPtr)(); + Constant *GetArgTLS; + Constant *GetRetvalTLS; + FunctionType *DFSanUnionFnTy; + FunctionType *DFSanUnionLoadFnTy; + FunctionType *DFSanUnimplementedFnTy; + FunctionType *DFSanSetLabelFnTy; + FunctionType *DFSanNonzeroLabelFnTy; + FunctionType *DFSanVarargWrapperFnTy; + Constant *DFSanUnionFn; + Constant *DFSanCheckedUnionFn; + Constant *DFSanUnionLoadFn; + Constant *DFSanUnimplementedFn; + Constant *DFSanSetLabelFn; + Constant *DFSanNonzeroLabelFn; + Constant *DFSanVarargWrapperFn; + MDNode *ColdCallWeights; + DFSanABIList ABIList; + DenseMap<Value *, Function *> UnwrappedFnMap; + AttributeSet ReadOnlyNoneAttrs; + DenseMap<const Function *, DISubprogram> FunctionDIs; + + Value *getShadowAddress(Value *Addr, Instruction *Pos); + bool isInstrumented(const Function *F); + bool isInstrumented(const GlobalAlias *GA); + FunctionType *getArgsFunctionType(FunctionType *T); + FunctionType *getTrampolineFunctionType(FunctionType *T); + FunctionType *getCustomFunctionType(FunctionType *T); + InstrumentedABI getInstrumentedABI(); + WrapperKind getWrapperKind(Function *F); + void addGlobalNamePrefix(GlobalValue *GV); + Function *buildWrapperFunction(Function *F, StringRef NewFName, + GlobalValue::LinkageTypes NewFLink, + FunctionType *NewFT); + Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); + + public: + DataFlowSanitizer(StringRef ABIListFile = StringRef(), + void *(*getArgTLS)() = nullptr, + void *(*getRetValTLS)() = nullptr); + static char ID; + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +struct DFSanFunction { + DataFlowSanitizer &DFS; + Function *F; + DominatorTree DT; + DataFlowSanitizer::InstrumentedABI IA; + bool IsNativeABI; + Value *ArgTLSPtr; + Value *RetvalTLSPtr; + AllocaInst *LabelReturnAlloca; + DenseMap<Value *, Value *> ValShadowMap; + DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap; + std::vector<std::pair<PHINode *, PHINode *> > PHIFixups; + DenseSet<Instruction *> SkipInsts; + std::vector<Value *> NonZeroChecks; + bool AvoidNewBlocks; + + struct CachedCombinedShadow { + BasicBlock *Block; + Value *Shadow; + }; + DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow> + CachedCombinedShadows; + DenseMap<Value *, std::set<Value *>> ShadowElements; + + DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) + : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), + IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr), + LabelReturnAlloca(nullptr) { + DT.recalculate(*F); + // FIXME: Need to track down the register allocator issue which causes poor + // performance in pathological cases with large numbers of basic blocks. + AvoidNewBlocks = F->size() > 1000; + } + Value *getArgTLSPtr(); + Value *getArgTLS(unsigned Index, Instruction *Pos); + Value *getRetvalTLS(); + Value *getShadow(Value *V); + void setShadow(Instruction *I, Value *Shadow); + Value *combineShadows(Value *V1, Value *V2, Instruction *Pos); + Value *combineOperandShadows(Instruction *Inst); + Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align, + Instruction *Pos); + void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow, + Instruction *Pos); +}; + +class DFSanVisitor : public InstVisitor<DFSanVisitor> { + public: + DFSanFunction &DFSF; + DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} + + void visitOperandShadowInst(Instruction &I); + + void visitBinaryOperator(BinaryOperator &BO); + void visitCastInst(CastInst &CI); + void visitCmpInst(CmpInst &CI); + void visitGetElementPtrInst(GetElementPtrInst &GEPI); + void visitLoadInst(LoadInst &LI); + void visitStoreInst(StoreInst &SI); + void visitReturnInst(ReturnInst &RI); + void visitCallSite(CallSite CS); + void visitPHINode(PHINode &PN); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitShuffleVectorInst(ShuffleVectorInst &I); + void visitExtractValueInst(ExtractValueInst &I); + void visitInsertValueInst(InsertValueInst &I); + void visitAllocaInst(AllocaInst &I); + void visitSelectInst(SelectInst &I); + void visitMemSetInst(MemSetInst &I); + void visitMemTransferInst(MemTransferInst &I); +}; + +} + +char DataFlowSanitizer::ID; +INITIALIZE_PASS(DataFlowSanitizer, "dfsan", + "DataFlowSanitizer: dynamic data flow analysis.", false, false) + +ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile, + void *(*getArgTLS)(), + void *(*getRetValTLS)()) { + return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS); +} + +DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile, + void *(*getArgTLS)(), + void *(*getRetValTLS)()) + : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS), + ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile + : ABIListFile)) { +} + +FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { + llvm::SmallVector<Type *, 4> ArgTypes; + std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes)); + for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) + ArgTypes.push_back(ShadowTy); + if (T->isVarArg()) + ArgTypes.push_back(ShadowPtrTy); + Type *RetType = T->getReturnType(); + if (!RetType->isVoidTy()) + RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr); + return FunctionType::get(RetType, ArgTypes, T->isVarArg()); +} + +FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { + assert(!T->isVarArg()); + llvm::SmallVector<Type *, 4> ArgTypes; + ArgTypes.push_back(T->getPointerTo()); + std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes)); + for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) + ArgTypes.push_back(ShadowTy); + Type *RetType = T->getReturnType(); + if (!RetType->isVoidTy()) + ArgTypes.push_back(ShadowPtrTy); + return FunctionType::get(T->getReturnType(), ArgTypes, false); +} + +FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { + llvm::SmallVector<Type *, 4> ArgTypes; + for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end(); + i != e; ++i) { + FunctionType *FT; + if (isa<PointerType>(*i) && (FT = dyn_cast<FunctionType>(cast<PointerType>( + *i)->getElementType()))) { + ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo()); + ArgTypes.push_back(Type::getInt8PtrTy(*Ctx)); + } else { + ArgTypes.push_back(*i); + } + } + for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) + ArgTypes.push_back(ShadowTy); + if (T->isVarArg()) + ArgTypes.push_back(ShadowPtrTy); + Type *RetType = T->getReturnType(); + if (!RetType->isVoidTy()) + ArgTypes.push_back(ShadowPtrTy); + return FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()); +} + +bool DataFlowSanitizer::doInitialization(Module &M) { + llvm::Triple TargetTriple(M.getTargetTriple()); + bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; + bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || + TargetTriple.getArch() == llvm::Triple::mips64el; + + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + if (!DLP) + report_fatal_error("data layout missing"); + DL = &DLP->getDataLayout(); + + Mod = &M; + Ctx = &M.getContext(); + ShadowTy = IntegerType::get(*Ctx, ShadowWidth); + ShadowPtrTy = PointerType::getUnqual(ShadowTy); + IntptrTy = DL->getIntPtrType(*Ctx); + ZeroShadow = ConstantInt::getSigned(ShadowTy, 0); + ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8); + if (IsX86_64) + ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); + else if (IsMIPS64) + ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL); + else + report_fatal_error("unsupported triple"); + + Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy }; + DFSanUnionFnTy = + FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false); + Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy }; + DFSanUnionLoadFnTy = + FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false); + DFSanUnimplementedFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy }; + DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), + DFSanSetLabelArgs, /*isVarArg=*/false); + DFSanNonzeroLabelFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); + DFSanVarargWrapperFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + + if (GetArgTLSPtr) { + Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); + ArgTLS = nullptr; + GetArgTLS = ConstantExpr::getIntToPtr( + ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)), + PointerType::getUnqual( + FunctionType::get(PointerType::getUnqual(ArgTLSTy), + (Type *)nullptr))); + } + if (GetRetvalTLSPtr) { + RetvalTLS = nullptr; + GetRetvalTLS = ConstantExpr::getIntToPtr( + ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)), + PointerType::getUnqual( + FunctionType::get(PointerType::getUnqual(ShadowTy), + (Type *)nullptr))); + } + + ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); + return true; +} + +bool DataFlowSanitizer::isInstrumented(const Function *F) { + return !ABIList.isIn(*F, "uninstrumented"); +} + +bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { + return !ABIList.isIn(*GA, "uninstrumented"); +} + +DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { + return ClArgsABI ? IA_Args : IA_TLS; +} + +DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { + if (ABIList.isIn(*F, "functional")) + return WK_Functional; + if (ABIList.isIn(*F, "discard")) + return WK_Discard; + if (ABIList.isIn(*F, "custom")) + return WK_Custom; + + return WK_Warning; +} + +void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) { + std::string GVName = GV->getName(), Prefix = "dfs$"; + GV->setName(Prefix + GVName); + + // Try to change the name of the function in module inline asm. We only do + // this for specific asm directives, currently only ".symver", to try to avoid + // corrupting asm which happens to contain the symbol name as a substring. + // Note that the substitution for .symver assumes that the versioned symbol + // also has an instrumented name. + std::string Asm = GV->getParent()->getModuleInlineAsm(); + std::string SearchStr = ".symver " + GVName + ","; + size_t Pos = Asm.find(SearchStr); + if (Pos != std::string::npos) { + Asm.replace(Pos, SearchStr.size(), + ".symver " + Prefix + GVName + "," + Prefix); + GV->getParent()->setModuleInlineAsm(Asm); + } +} + +Function * +DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, + GlobalValue::LinkageTypes NewFLink, + FunctionType *NewFT) { + FunctionType *FT = F->getFunctionType(); + Function *NewF = Function::Create(NewFT, NewFLink, NewFName, + F->getParent()); + NewF->copyAttributesFrom(F); + NewF->removeAttributes( + AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(NewFT->getReturnType(), + AttributeSet::ReturnIndex)); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); + if (F->isVarArg()) { + NewF->removeAttributes( + AttributeSet::FunctionIndex, + AttributeSet().addAttribute(*Ctx, AttributeSet::FunctionIndex, + "split-stack")); + CallInst::Create(DFSanVarargWrapperFn, + IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", + BB); + new UnreachableInst(*Ctx, BB); + } else { + std::vector<Value *> Args; + unsigned n = FT->getNumParams(); + for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n) + Args.push_back(&*ai); + CallInst *CI = CallInst::Create(F, Args, "", BB); + if (FT->getReturnType()->isVoidTy()) + ReturnInst::Create(*Ctx, BB); + else + ReturnInst::Create(*Ctx, CI, BB); + } + + return NewF; +} + +Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, + StringRef FName) { + FunctionType *FTT = getTrampolineFunctionType(FT); + Constant *C = Mod->getOrInsertFunction(FName, FTT); + Function *F = dyn_cast<Function>(C); + if (F && F->isDeclaration()) { + F->setLinkage(GlobalValue::LinkOnceODRLinkage); + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + std::vector<Value *> Args; + Function::arg_iterator AI = F->arg_begin(); ++AI; + for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) + Args.push_back(&*AI); + CallInst *CI = + CallInst::Create(&F->getArgumentList().front(), Args, "", BB); + ReturnInst *RI; + if (FT->getReturnType()->isVoidTy()) + RI = ReturnInst::Create(*Ctx, BB); + else + RI = ReturnInst::Create(*Ctx, CI, BB); + + DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); + Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; + for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) + DFSF.ValShadowMap[ValAI] = ShadowAI; + DFSanVisitor(DFSF).visitCallInst(*CI); + if (!FT->getReturnType()->isVoidTy()) + new StoreInst(DFSF.getShadow(RI->getReturnValue()), + &F->getArgumentList().back(), RI); + } + + return C; +} + +bool DataFlowSanitizer::runOnModule(Module &M) { + if (!DL) + return false; + + if (ABIList.isIn(M, "skip")) + return false; + + FunctionDIs = makeSubprogramMap(M); + + if (!GetArgTLSPtr) { + Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); + ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); + if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + if (!GetRetvalTLSPtr) { + RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy); + if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + + DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy); + if (Function *F = dyn_cast<Function>(DFSanUnionFn)) { + F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); + F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + F->addAttribute(1, Attribute::ZExt); + F->addAttribute(2, Attribute::ZExt); + } + DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy); + if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) { + F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); + F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + F->addAttribute(1, Attribute::ZExt); + F->addAttribute(2, Attribute::ZExt); + } + DFSanUnionLoadFn = + Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy); + if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) { + F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); + F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + } + DFSanUnimplementedFn = + Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); + DFSanSetLabelFn = + Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy); + if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) { + F->addAttribute(1, Attribute::ZExt); + } + DFSanNonzeroLabelFn = + Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); + DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", + DFSanVarargWrapperFnTy); + + std::vector<Function *> FnsToInstrument; + llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI; + for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) { + if (!i->isIntrinsic() && + i != DFSanUnionFn && + i != DFSanCheckedUnionFn && + i != DFSanUnionLoadFn && + i != DFSanUnimplementedFn && + i != DFSanSetLabelFn && + i != DFSanNonzeroLabelFn && + i != DFSanVarargWrapperFn) + FnsToInstrument.push_back(&*i); + } + + // Give function aliases prefixes when necessary, and build wrappers where the + // instrumentedness is inconsistent. + for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) { + GlobalAlias *GA = &*i; + ++i; + // Don't stop on weak. We assume people aren't playing games with the + // instrumentedness of overridden weak aliases. + if (auto F = dyn_cast<Function>(GA->getBaseObject())) { + bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); + if (GAInst && FInst) { + addGlobalNamePrefix(GA); + } else if (GAInst != FInst) { + // Non-instrumented alias of an instrumented function, or vice versa. + // Replace the alias with a native-ABI wrapper of the aliasee. The pass + // below will take care of instrumenting it. + Function *NewF = + buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); + GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); + NewF->takeName(GA); + GA->eraseFromParent(); + FnsToInstrument.push_back(NewF); + } + } + } + + AttrBuilder B; + B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); + ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B); + + // First, change the ABI of every function in the module. ABI-listed + // functions keep their original ABI and get a wrapper function. + for (std::vector<Function *>::iterator i = FnsToInstrument.begin(), + e = FnsToInstrument.end(); + i != e; ++i) { + Function &F = **i; + FunctionType *FT = F.getFunctionType(); + + bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() && + FT->getReturnType()->isVoidTy()); + + if (isInstrumented(&F)) { + // Instrumented functions get a 'dfs$' prefix. This allows us to more + // easily identify cases of mismatching ABIs. + if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) { + FunctionType *NewFT = getArgsFunctionType(FT); + Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M); + NewF->copyAttributesFrom(&F); + NewF->removeAttributes( + AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(NewFT->getReturnType(), + AttributeSet::ReturnIndex)); + for (Function::arg_iterator FArg = F.arg_begin(), + NewFArg = NewF->arg_begin(), + FArgEnd = F.arg_end(); + FArg != FArgEnd; ++FArg, ++NewFArg) { + FArg->replaceAllUsesWith(NewFArg); + } + NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); + + for (Function::user_iterator UI = F.user_begin(), UE = F.user_end(); + UI != UE;) { + BlockAddress *BA = dyn_cast<BlockAddress>(*UI); + ++UI; + if (BA) { + BA->replaceAllUsesWith( + BlockAddress::get(NewF, BA->getBasicBlock())); + delete BA; + } + } + F.replaceAllUsesWith( + ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT))); + NewF->takeName(&F); + F.eraseFromParent(); + *i = NewF; + addGlobalNamePrefix(NewF); + } else { + addGlobalNamePrefix(&F); + } + } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { + // Build a wrapper function for F. The wrapper simply calls F, and is + // added to FnsToInstrument so that any instrumentation according to its + // WrapperKind is done in the second pass below. + FunctionType *NewFT = getInstrumentedABI() == IA_Args + ? getArgsFunctionType(FT) + : FT; + Function *NewF = buildWrapperFunction( + &F, std::string("dfsw$") + std::string(F.getName()), + GlobalValue::LinkOnceODRLinkage, NewFT); + if (getInstrumentedABI() == IA_TLS) + NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs); + + Value *WrappedFnCst = + ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); + F.replaceAllUsesWith(WrappedFnCst); + + // Patch the pointer to LLVM function in debug info descriptor. + auto DI = FunctionDIs.find(&F); + if (DI != FunctionDIs.end()) + DI->second.replaceFunction(&F); + + UnwrappedFnMap[WrappedFnCst] = &F; + *i = NewF; + + if (!F.isDeclaration()) { + // This function is probably defining an interposition of an + // uninstrumented function and hence needs to keep the original ABI. + // But any functions it may call need to use the instrumented ABI, so + // we instrument it in a mode which preserves the original ABI. + FnsWithNativeABI.insert(&F); + + // This code needs to rebuild the iterators, as they may be invalidated + // by the push_back, taking care that the new range does not include + // any functions added by this code. + size_t N = i - FnsToInstrument.begin(), + Count = e - FnsToInstrument.begin(); + FnsToInstrument.push_back(&F); + i = FnsToInstrument.begin() + N; + e = FnsToInstrument.begin() + Count; + } + // Hopefully, nobody will try to indirectly call a vararg + // function... yet. + } else if (FT->isVarArg()) { + UnwrappedFnMap[&F] = &F; + *i = nullptr; + } + } + + for (std::vector<Function *>::iterator i = FnsToInstrument.begin(), + e = FnsToInstrument.end(); + i != e; ++i) { + if (!*i || (*i)->isDeclaration()) + continue; + + removeUnreachableBlocks(**i); + + DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i)); + + // DFSanVisitor may create new basic blocks, which confuses df_iterator. + // Build a copy of the list before iterating over it. + llvm::SmallVector<BasicBlock *, 4> BBList( + depth_first(&(*i)->getEntryBlock())); + + for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(), + e = BBList.end(); + i != e; ++i) { + Instruction *Inst = &(*i)->front(); + while (1) { + // DFSanVisitor may split the current basic block, changing the current + // instruction's next pointer and moving the next instruction to the + // tail block from which we should continue. + Instruction *Next = Inst->getNextNode(); + // DFSanVisitor may delete Inst, so keep track of whether it was a + // terminator. + bool IsTerminator = isa<TerminatorInst>(Inst); + if (!DFSF.SkipInsts.count(Inst)) + DFSanVisitor(DFSF).visit(Inst); + if (IsTerminator) + break; + Inst = Next; + } + } + + // We will not necessarily be able to compute the shadow for every phi node + // until we have visited every block. Therefore, the code that handles phi + // nodes adds them to the PHIFixups list so that they can be properly + // handled here. + for (std::vector<std::pair<PHINode *, PHINode *> >::iterator + i = DFSF.PHIFixups.begin(), + e = DFSF.PHIFixups.end(); + i != e; ++i) { + for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n; + ++val) { + i->second->setIncomingValue( + val, DFSF.getShadow(i->first->getIncomingValue(val))); + } + } + + // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy + // places (i.e. instructions in basic blocks we haven't even begun visiting + // yet). To make our life easier, do this work in a pass after the main + // instrumentation. + if (ClDebugNonzeroLabels) { + for (Value *V : DFSF.NonZeroChecks) { + Instruction *Pos; + if (Instruction *I = dyn_cast<Instruction>(V)) + Pos = I->getNextNode(); + else + Pos = DFSF.F->getEntryBlock().begin(); + while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos)) + Pos = Pos->getNextNode(); + IRBuilder<> IRB(Pos); + Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow); + BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( + Ne, Pos, /*Unreachable=*/false, ColdCallWeights)); + IRBuilder<> ThenIRB(BI); + ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn); + } + } + } + + return false; +} + +Value *DFSanFunction::getArgTLSPtr() { + if (ArgTLSPtr) + return ArgTLSPtr; + if (DFS.ArgTLS) + return ArgTLSPtr = DFS.ArgTLS; + + IRBuilder<> IRB(F->getEntryBlock().begin()); + return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS); +} + +Value *DFSanFunction::getRetvalTLS() { + if (RetvalTLSPtr) + return RetvalTLSPtr; + if (DFS.RetvalTLS) + return RetvalTLSPtr = DFS.RetvalTLS; + + IRBuilder<> IRB(F->getEntryBlock().begin()); + return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS); +} + +Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) { + IRBuilder<> IRB(Pos); + return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx); +} + +Value *DFSanFunction::getShadow(Value *V) { + if (!isa<Argument>(V) && !isa<Instruction>(V)) + return DFS.ZeroShadow; + Value *&Shadow = ValShadowMap[V]; + if (!Shadow) { + if (Argument *A = dyn_cast<Argument>(V)) { + if (IsNativeABI) + return DFS.ZeroShadow; + switch (IA) { + case DataFlowSanitizer::IA_TLS: { + Value *ArgTLSPtr = getArgTLSPtr(); + Instruction *ArgTLSPos = + DFS.ArgTLS ? &*F->getEntryBlock().begin() + : cast<Instruction>(ArgTLSPtr)->getNextNode(); + IRBuilder<> IRB(ArgTLSPos); + Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos)); + break; + } + case DataFlowSanitizer::IA_Args: { + unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2; + Function::arg_iterator i = F->arg_begin(); + while (ArgIdx--) + ++i; + Shadow = i; + assert(Shadow->getType() == DFS.ShadowTy); + break; + } + } + NonZeroChecks.push_back(Shadow); + } else { + Shadow = DFS.ZeroShadow; + } + } + return Shadow; +} + +void DFSanFunction::setShadow(Instruction *I, Value *Shadow) { + assert(!ValShadowMap.count(I)); + assert(Shadow->getType() == DFS.ShadowTy); + ValShadowMap[I] = Shadow; +} + +Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { + assert(Addr != RetvalTLS && "Reinstrumenting?"); + IRBuilder<> IRB(Pos); + return IRB.CreateIntToPtr( + IRB.CreateMul( + IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask), + ShadowPtrMul), + ShadowPtrTy); +} + +// Generates IR to compute the union of the two given shadows, inserting it +// before Pos. Returns the computed union Value. +Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { + if (V1 == DFS.ZeroShadow) + return V2; + if (V2 == DFS.ZeroShadow) + return V1; + if (V1 == V2) + return V1; + + auto V1Elems = ShadowElements.find(V1); + auto V2Elems = ShadowElements.find(V2); + if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) { + if (std::includes(V1Elems->second.begin(), V1Elems->second.end(), + V2Elems->second.begin(), V2Elems->second.end())) { + return V1; + } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(), + V1Elems->second.begin(), V1Elems->second.end())) { + return V2; + } + } else if (V1Elems != ShadowElements.end()) { + if (V1Elems->second.count(V2)) + return V1; + } else if (V2Elems != ShadowElements.end()) { + if (V2Elems->second.count(V1)) + return V2; + } + + auto Key = std::make_pair(V1, V2); + if (V1 > V2) + std::swap(Key.first, Key.second); + CachedCombinedShadow &CCS = CachedCombinedShadows[Key]; + if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent())) + return CCS.Shadow; + + IRBuilder<> IRB(Pos); + if (AvoidNewBlocks) { + CallInst *Call = IRB.CreateCall2(DFS.DFSanCheckedUnionFn, V1, V2); + Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + Call->addAttribute(1, Attribute::ZExt); + Call->addAttribute(2, Attribute::ZExt); + + CCS.Block = Pos->getParent(); + CCS.Shadow = Call; + } else { + BasicBlock *Head = Pos->getParent(); + Value *Ne = IRB.CreateICmpNE(V1, V2); + BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( + Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT)); + IRBuilder<> ThenIRB(BI); + CallInst *Call = ThenIRB.CreateCall2(DFS.DFSanUnionFn, V1, V2); + Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + Call->addAttribute(1, Attribute::ZExt); + Call->addAttribute(2, Attribute::ZExt); + + BasicBlock *Tail = BI->getSuccessor(0); + PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin()); + Phi->addIncoming(Call, Call->getParent()); + Phi->addIncoming(V1, Head); + + CCS.Block = Tail; + CCS.Shadow = Phi; + } + + std::set<Value *> UnionElems; + if (V1Elems != ShadowElements.end()) { + UnionElems = V1Elems->second; + } else { + UnionElems.insert(V1); + } + if (V2Elems != ShadowElements.end()) { + UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end()); + } else { + UnionElems.insert(V2); + } + ShadowElements[CCS.Shadow] = std::move(UnionElems); + + return CCS.Shadow; +} + +// A convenience function which folds the shadows of each of the operands +// of the provided instruction Inst, inserting the IR before Inst. Returns +// the computed union Value. +Value *DFSanFunction::combineOperandShadows(Instruction *Inst) { + if (Inst->getNumOperands() == 0) + return DFS.ZeroShadow; + + Value *Shadow = getShadow(Inst->getOperand(0)); + for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) { + Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst); + } + return Shadow; +} + +void DFSanVisitor::visitOperandShadowInst(Instruction &I) { + Value *CombinedShadow = DFSF.combineOperandShadows(&I); + DFSF.setShadow(&I, CombinedShadow); +} + +// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where +// Addr has alignment Align, and take the union of each of those shadows. +Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, + Instruction *Pos) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { + llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i = + AllocaShadowMap.find(AI); + if (i != AllocaShadowMap.end()) { + IRBuilder<> IRB(Pos); + return IRB.CreateLoad(i->second); + } + } + + uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; + SmallVector<Value *, 2> Objs; + GetUnderlyingObjects(Addr, Objs, DFS.DL); + bool AllConstants = true; + for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end(); + i != e; ++i) { + if (isa<Function>(*i) || isa<BlockAddress>(*i)) + continue; + if (isa<GlobalVariable>(*i) && cast<GlobalVariable>(*i)->isConstant()) + continue; + + AllConstants = false; + break; + } + if (AllConstants) + return DFS.ZeroShadow; + + Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); + switch (Size) { + case 0: + return DFS.ZeroShadow; + case 1: { + LoadInst *LI = new LoadInst(ShadowAddr, "", Pos); + LI->setAlignment(ShadowAlign); + return LI; + } + case 2: { + IRBuilder<> IRB(Pos); + Value *ShadowAddr1 = + IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1)); + return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign), + IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos); + } + } + if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) { + // Fast path for the common case where each byte has identical shadow: load + // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any + // shadow is non-equal. + BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F); + IRBuilder<> FallbackIRB(FallbackBB); + CallInst *FallbackCall = FallbackIRB.CreateCall2( + DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)); + FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + + // Compare each of the shadows stored in the loaded 64 bits to each other, + // by computing (WideShadow rotl ShadowWidth) == WideShadow. + IRBuilder<> IRB(Pos); + Value *WideAddr = + IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx)); + Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign); + Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy); + Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth); + Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth); + Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow); + Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow); + + BasicBlock *Head = Pos->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(Pos); + + if (DomTreeNode *OldNode = DT.getNode(Head)) { + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT.addNewBlock(Tail, Head); + for (auto Child : Children) + DT.changeImmediateDominator(Child, NewNode); + } + + // In the following code LastBr will refer to the previous basic block's + // conditional branch instruction, whose true successor is fixed up to point + // to the next block during the loop below or to the tail after the final + // iteration. + BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq); + ReplaceInstWithInst(Head->getTerminator(), LastBr); + DT.addNewBlock(FallbackBB, Head); + + for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size; + Ofs += 64 / DFS.ShadowWidth) { + BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F); + DT.addNewBlock(NextBB, LastBr->getParent()); + IRBuilder<> NextIRB(NextBB); + WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1)); + Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign); + ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow); + LastBr->setSuccessor(0, NextBB); + LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB); + } + + LastBr->setSuccessor(0, Tail); + FallbackIRB.CreateBr(Tail); + PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); + Shadow->addIncoming(FallbackCall, FallbackBB); + Shadow->addIncoming(TruncShadow, LastBr->getParent()); + return Shadow; + } + + IRBuilder<> IRB(Pos); + CallInst *FallbackCall = IRB.CreateCall2( + DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)); + FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + return FallbackCall; +} + +void DFSanVisitor::visitLoadInst(LoadInst &LI) { + uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType()); + if (Size == 0) { + DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow); + return; + } + + uint64_t Align; + if (ClPreserveAlignment) { + Align = LI.getAlignment(); + if (Align == 0) + Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType()); + } else { + Align = 1; + } + IRBuilder<> IRB(&LI); + Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI); + if (ClCombinePointerLabelsOnLoad) { + Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); + Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI); + } + if (Shadow != DFSF.DFS.ZeroShadow) + DFSF.NonZeroChecks.push_back(Shadow); + + DFSF.setShadow(&LI, Shadow); +} + +void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, + Value *Shadow, Instruction *Pos) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { + llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i = + AllocaShadowMap.find(AI); + if (i != AllocaShadowMap.end()) { + IRBuilder<> IRB(Pos); + IRB.CreateStore(Shadow, i->second); + return; + } + } + + uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; + IRBuilder<> IRB(Pos); + Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); + if (Shadow == DFS.ZeroShadow) { + IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth); + Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0); + Value *ExtShadowAddr = + IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy)); + IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign); + return; + } + + const unsigned ShadowVecSize = 128 / DFS.ShadowWidth; + uint64_t Offset = 0; + if (Size >= ShadowVecSize) { + VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize); + Value *ShadowVec = UndefValue::get(ShadowVecTy); + for (unsigned i = 0; i != ShadowVecSize; ++i) { + ShadowVec = IRB.CreateInsertElement( + ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i)); + } + Value *ShadowVecAddr = + IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy)); + do { + Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset); + IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); + Size -= ShadowVecSize; + ++Offset; + } while (Size >= ShadowVecSize); + Offset *= ShadowVecSize; + } + while (Size > 0) { + Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset); + IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign); + --Size; + ++Offset; + } +} + +void DFSanVisitor::visitStoreInst(StoreInst &SI) { + uint64_t Size = + DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType()); + if (Size == 0) + return; + + uint64_t Align; + if (ClPreserveAlignment) { + Align = SI.getAlignment(); + if (Align == 0) + Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType()); + } else { + Align = 1; + } + + Value* Shadow = DFSF.getShadow(SI.getValueOperand()); + if (ClCombinePointerLabelsOnStore) { + Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); + Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI); + } + DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI); +} + +void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { + visitOperandShadowInst(BO); +} + +void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); } + +void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); } + +void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + visitOperandShadowInst(GEPI); +} + +void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) { + visitOperandShadowInst(I); +} + +void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) { + visitOperandShadowInst(I); +} + +void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) { + visitOperandShadowInst(I); +} + +void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) { + visitOperandShadowInst(I); +} + +void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) { + visitOperandShadowInst(I); +} + +void DFSanVisitor::visitAllocaInst(AllocaInst &I) { + bool AllLoadsStores = true; + for (User *U : I.users()) { + if (isa<LoadInst>(U)) + continue; + + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getPointerOperand() == &I) + continue; + } + + AllLoadsStores = false; + break; + } + if (AllLoadsStores) { + IRBuilder<> IRB(&I); + DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy); + } + DFSF.setShadow(&I, DFSF.DFS.ZeroShadow); +} + +void DFSanVisitor::visitSelectInst(SelectInst &I) { + Value *CondShadow = DFSF.getShadow(I.getCondition()); + Value *TrueShadow = DFSF.getShadow(I.getTrueValue()); + Value *FalseShadow = DFSF.getShadow(I.getFalseValue()); + + if (isa<VectorType>(I.getCondition()->getType())) { + DFSF.setShadow( + &I, + DFSF.combineShadows( + CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I)); + } else { + Value *ShadowSel; + if (TrueShadow == FalseShadow) { + ShadowSel = TrueShadow; + } else { + ShadowSel = + SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I); + } + DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I)); + } +} + +void DFSanVisitor::visitMemSetInst(MemSetInst &I) { + IRBuilder<> IRB(&I); + Value *ValShadow = DFSF.getShadow(I.getValue()); + IRB.CreateCall3( + DFSF.DFS.DFSanSetLabelFn, ValShadow, + IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)), + IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)); +} + +void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { + IRBuilder<> IRB(&I); + Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); + Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I); + Value *LenShadow = IRB.CreateMul( + I.getLength(), + ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8)); + Value *AlignShadow; + if (ClPreserveAlignment) { + AlignShadow = IRB.CreateMul(I.getAlignmentCst(), + ConstantInt::get(I.getAlignmentCst()->getType(), + DFSF.DFS.ShadowWidth / 8)); + } else { + AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(), + DFSF.DFS.ShadowWidth / 8); + } + Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); + DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr); + SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); + IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow, + AlignShadow, I.getVolatileCst()); +} + +void DFSanVisitor::visitReturnInst(ReturnInst &RI) { + if (!DFSF.IsNativeABI && RI.getReturnValue()) { + switch (DFSF.IA) { + case DataFlowSanitizer::IA_TLS: { + Value *S = DFSF.getShadow(RI.getReturnValue()); + IRBuilder<> IRB(&RI); + IRB.CreateStore(S, DFSF.getRetvalTLS()); + break; + } + case DataFlowSanitizer::IA_Args: { + IRBuilder<> IRB(&RI); + Type *RT = DFSF.F->getFunctionType()->getReturnType(); + Value *InsVal = + IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0); + Value *InsShadow = + IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1); + RI.setOperand(0, InsShadow); + break; + } + } + } +} + +void DFSanVisitor::visitCallSite(CallSite CS) { + Function *F = CS.getCalledFunction(); + if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) { + visitOperandShadowInst(*CS.getInstruction()); + return; + } + + // Calls to this function are synthesized in wrappers, and we shouldn't + // instrument them. + if (F == DFSF.DFS.DFSanVarargWrapperFn) + return; + + assert(!(cast<FunctionType>( + CS.getCalledValue()->getType()->getPointerElementType())->isVarArg() && + dyn_cast<InvokeInst>(CS.getInstruction()))); + + IRBuilder<> IRB(CS.getInstruction()); + + DenseMap<Value *, Function *>::iterator i = + DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue()); + if (i != DFSF.DFS.UnwrappedFnMap.end()) { + Function *F = i->second; + switch (DFSF.DFS.getWrapperKind(F)) { + case DataFlowSanitizer::WK_Warning: { + CS.setCalledFunction(F); + IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, + IRB.CreateGlobalStringPtr(F->getName())); + DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); + return; + } + case DataFlowSanitizer::WK_Discard: { + CS.setCalledFunction(F); + DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); + return; + } + case DataFlowSanitizer::WK_Functional: { + CS.setCalledFunction(F); + visitOperandShadowInst(*CS.getInstruction()); + return; + } + case DataFlowSanitizer::WK_Custom: { + // Don't try to handle invokes of custom functions, it's too complicated. + // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ + // wrapper. + if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { + FunctionType *FT = F->getFunctionType(); + FunctionType *CustomFT = DFSF.DFS.getCustomFunctionType(FT); + std::string CustomFName = "__dfsw_"; + CustomFName += F->getName(); + Constant *CustomF = + DFSF.DFS.Mod->getOrInsertFunction(CustomFName, CustomFT); + if (Function *CustomFn = dyn_cast<Function>(CustomF)) { + CustomFn->copyAttributesFrom(F); + + // Custom functions returning non-void will write to the return label. + if (!FT->getReturnType()->isVoidTy()) { + CustomFn->removeAttributes(AttributeSet::FunctionIndex, + DFSF.DFS.ReadOnlyNoneAttrs); + } + } + + std::vector<Value *> Args; + + CallSite::arg_iterator i = CS.arg_begin(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) { + Type *T = (*i)->getType(); + FunctionType *ParamFT; + if (isa<PointerType>(T) && + (ParamFT = dyn_cast<FunctionType>( + cast<PointerType>(T)->getElementType()))) { + std::string TName = "dfst"; + TName += utostr(FT->getNumParams() - n); + TName += "$"; + TName += F->getName(); + Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName); + Args.push_back(T); + Args.push_back( + IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx))); + } else { + Args.push_back(*i); + } + } + + i = CS.arg_begin(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) + Args.push_back(DFSF.getShadow(*i)); + + if (FT->isVarArg()) { + auto LabelVAAlloca = + new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy, + CS.arg_size() - FT->getNumParams()), + "labelva", DFSF.F->getEntryBlock().begin()); + + for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { + auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n); + IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr); + } + + Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0)); + } + + if (!FT->getReturnType()->isVoidTy()) { + if (!DFSF.LabelReturnAlloca) { + DFSF.LabelReturnAlloca = + new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn", + DFSF.F->getEntryBlock().begin()); + } + Args.push_back(DFSF.LabelReturnAlloca); + } + + for (i = CS.arg_begin() + FT->getNumParams(); i != CS.arg_end(); ++i) + Args.push_back(*i); + + CallInst *CustomCI = IRB.CreateCall(CustomF, Args); + CustomCI->setCallingConv(CI->getCallingConv()); + CustomCI->setAttributes(CI->getAttributes()); + + if (!FT->getReturnType()->isVoidTy()) { + LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca); + DFSF.setShadow(CustomCI, LabelLoad); + } + + CI->replaceAllUsesWith(CustomCI); + CI->eraseFromParent(); + return; + } + break; + } + } + } + + FunctionType *FT = cast<FunctionType>( + CS.getCalledValue()->getType()->getPointerElementType()); + if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { + for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) { + IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)), + DFSF.getArgTLS(i, CS.getInstruction())); + } + } + + Instruction *Next = nullptr; + if (!CS.getType()->isVoidTy()) { + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + if (II->getNormalDest()->getSinglePredecessor()) { + Next = II->getNormalDest()->begin(); + } else { + BasicBlock *NewBB = + SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS); + Next = NewBB->begin(); + } + } else { + Next = CS->getNextNode(); + } + + if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { + IRBuilder<> NextIRB(Next); + LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS()); + DFSF.SkipInsts.insert(LI); + DFSF.setShadow(CS.getInstruction(), LI); + DFSF.NonZeroChecks.push_back(LI); + } + } + + // Do all instrumentation for IA_Args down here to defer tampering with the + // CFG in a way that SplitEdge may be able to detect. + if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { + FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); + Value *Func = + IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT)); + std::vector<Value *> Args; + + CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) + Args.push_back(*i); + + i = CS.arg_begin(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) + Args.push_back(DFSF.getShadow(*i)); + + if (FT->isVarArg()) { + unsigned VarArgSize = CS.arg_size() - FT->getNumParams(); + ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); + AllocaInst *VarArgShadow = + new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin()); + Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0)); + for (unsigned n = 0; i != e; ++i, ++n) { + IRB.CreateStore(DFSF.getShadow(*i), + IRB.CreateConstGEP2_32(VarArgShadow, 0, n)); + Args.push_back(*i); + } + } + + CallSite NewCS; + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(), + Args); + } else { + NewCS = IRB.CreateCall(Func, Args); + } + NewCS.setCallingConv(CS.getCallingConv()); + NewCS.setAttributes(CS.getAttributes().removeAttributes( + *DFSF.DFS.Ctx, AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(), + AttributeSet::ReturnIndex))); + + if (Next) { + ExtractValueInst *ExVal = + ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next); + DFSF.SkipInsts.insert(ExVal); + ExtractValueInst *ExShadow = + ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next); + DFSF.SkipInsts.insert(ExShadow); + DFSF.setShadow(ExVal, ExShadow); + DFSF.NonZeroChecks.push_back(ExShadow); + + CS.getInstruction()->replaceAllUsesWith(ExVal); + } + + CS.getInstruction()->eraseFromParent(); + } +} + +void DFSanVisitor::visitPHINode(PHINode &PN) { + PHINode *ShadowPN = + PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN); + + // Give the shadow phi node valid predecessors to fool SplitEdge into working. + Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy); + for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e; + ++i) { + ShadowPN->addIncoming(UndefShadow, *i); + } + + DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN)); + DFSF.setShadow(&PN, ShadowPN); +} |