diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
28 files changed, 16633 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp new file mode 100644 index 0000000..0e05129 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -0,0 +1,1019 @@ +//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass promotes "by reference" arguments to be "by value" arguments. In +// practice, this means looking for internal functions that have pointer +// arguments. If it can prove, through the use of alias analysis, that an +// argument is *only* loaded, then it can pass the value into the function +// instead of the address of the value. This can cause recursive simplification +// of code and lead to the elimination of allocas (especially in C++ template +// code like the STL). +// +// This pass also handles aggregate arguments that are passed into a function, +// scalarizing them if the elements of the aggregate are only loaded. Note that +// by default it refuses to scalarize aggregates which would require passing in +// more than three operands to the function, because passing thousands of +// operands for a large array or structure is unprofitable! This limit can be +// configured or disabled, however. +// +// Note that this transformation could also be done for arguments that are only +// stored to (returning the value instead), but does not currently. This case +// would be best handled when and if LLVM begins supporting multiple return +// values from functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <set> +using namespace llvm; + +#define DEBUG_TYPE "argpromotion" + +STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); +STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); +STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); +STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated"); + +namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. + /// + struct ArgPromotion : public CallGraphSCCPass { + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool runOnSCC(CallGraphSCC &SCC) override; + static char ID; // Pass identification, replacement for typeid + explicit ArgPromotion(unsigned maxElements = 3) + : CallGraphSCCPass(ID), maxElements(maxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); + } + + /// A vector used to hold the indices of a single GEP instruction + typedef std::vector<uint64_t> IndicesVector; + + private: + bool isDenselyPacked(Type *type, const DataLayout &DL); + bool canPaddingBeAccessed(Argument *Arg); + CallGraphNode *PromoteArguments(CallGraphNode *CGN); + bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, + AAResults &AAR) const; + CallGraphNode *DoPromotion(Function *F, + SmallPtrSetImpl<Argument*> &ArgsToPromote, + SmallPtrSetImpl<Argument*> &ByValArgsToTransform); + + using llvm::Pass::doInitialization; + bool doInitialization(CallGraph &CG) override; + /// The maximum number of elements to expand, or 0 for unlimited. + unsigned maxElements; + }; +} + +char ArgPromotion::ID = 0; +INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) + +Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { + return new ArgPromotion(maxElements); +} + +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { + bool Changed = false, LocalChange; + + do { // Iterate until we stop promoting from this SCC. + LocalChange = false; + // Attempt to promote arguments from all functions in this SCC. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + if (CallGraphNode *CGN = PromoteArguments(*I)) { + LocalChange = true; + SCC.ReplaceNode(*I, CGN); + } + } + Changed |= LocalChange; // Remember that we changed something. + } while (LocalChange); + + return Changed; +} + +/// \brief Checks if a type could have padding bytes. +bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) { + + // There is no size information, so be conservative. + if (!type->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. + if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) + return false; + + if (!isa<CompositeType>(type)) + return true; + + // For homogenous sequential types, check for padding within members. + if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) + return isa<PointerType>(seqTy) || + isDenselyPacked(seqTy->getElementType(), DL); + + // Check for padding within and between elements of a struct. + StructType *StructTy = cast<StructType>(type); + const StructLayout *Layout = DL.getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { + Type *ElTy = StructTy->getElementType(i); + if (!isDenselyPacked(ElTy, DL)) + return false; + if (StartPos != Layout->getElementOffsetInBits(i)) + return false; + StartPos += DL.getTypeAllocSizeInBits(ElTy); + } + + return true; +} + +/// \brief Checks if the padding bytes of an argument could be accessed. +bool ArgPromotion::canPaddingBeAccessed(Argument *arg) { + + assert(arg->hasByValAttr()); + + // Track all the pointers to the argument to make sure they are not captured. + SmallPtrSet<Value *, 16> PtrValues; + PtrValues.insert(arg); + + // Track all of the stores. + SmallVector<StoreInst *, 16> Stores; + + // Scan through the uses recursively to make sure the pointer is always used + // sanely. + SmallVector<Value *, 16> WorkList; + WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { + if (PtrValues.insert(V).second) + WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); + } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { + Stores.push_back(Store); + } else if (!isa<LoadInst>(V)) { + return true; + } + } + +// Check to make sure the pointers aren't captured + for (StoreInst *Store : Stores) + if (PtrValues.count(Store->getValueOperand())) + return true; + + return false; +} + +/// PromoteArguments - This method checks the specified function to see if there +/// are any promotable arguments and if it is safe to promote the function (for +/// example, all callers are direct). If safe to promote some arguments, it +/// calls the DoPromotion method. +/// +CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { + Function *F = CGN->getFunction(); + + // Make sure that it is local to this module. + if (!F || !F->hasLocalLinkage()) return nullptr; + + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) return nullptr; + + // First check: see if there are any pointer arguments! If not, quick exit. + SmallVector<Argument*, 16> PointerArgs; + for (Argument &I : F->args()) + if (I.getType()->isPointerTy()) + PointerArgs.push_back(&I); + if (PointerArgs.empty()) return nullptr; + + // Second check: make sure that all callers are direct callers. We can't + // transform functions that have indirect callers. Also see if the function + // is self-recursive. + bool isSelfRecursive = false; + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + // Must be a direct call. + if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; + + if (CS.getInstruction()->getParent()->getParent() == F) + isSelfRecursive = true; + } + + const DataLayout &DL = F->getParent()->getDataLayout(); + + // We need to manually construct BasicAA directly in order to disable its use + // of other function analyses. + BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); + + // Construct our own AA results for this function. We do this manually to + // work around the limitations of the legacy pass manager. + AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); + + // Check to see which arguments are promotable. If an argument is promotable, + // add it to ArgsToPromote. + SmallPtrSet<Argument*, 8> ArgsToPromote; + SmallPtrSet<Argument*, 8> ByValArgsToTransform; + for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { + Argument *PtrArg = PointerArgs[i]; + Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); + + // Replace sret attribute with noalias. This reduces register pressure by + // avoiding a register copy. + if (PtrArg->hasStructRetAttr()) { + unsigned ArgNo = PtrArg->getArgNo(); + F->setAttributes( + F->getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + CS.setAttributes( + CS.getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, + Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + } + } + + // If this is a byval argument, and if the aggregate type is small, just + // pass the elements, which is always safe, if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. This does + // not apply to inalloca. + bool isSafeToPromote = + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + if (isSafeToPromote) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { + if (maxElements > 0 && STy->getNumElements() > maxElements) { + DEBUG(dbgs() << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more" + << " than " << maxElements << " arguments to the function.\n"); + continue; + } + + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (const auto *EltTy : STy->elements()) { + if (!EltTy->isSingleValueType()) { + AllSimple = false; + break; + } + } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow scalarrepl to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } + } + } + + // If the argument is a recursive type and we're in a recursive + // function, we could end up infinitely peeling the function argument. + if (isSelfRecursive) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { + bool RecursiveType = false; + for (const auto *EltTy : STy->elements()) { + if (EltTy == PtrArg->getType()) { + RecursiveType = true; + break; + } + } + if (RecursiveType) + continue; + } + } + + // Otherwise, see if we can promote the pointer to its value. + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) + ArgsToPromote.insert(PtrArg); + } + + // No promotable pointer arguments. + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + return nullptr; + + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); +} + +/// AllCallersPassInValidPointerForArgument - Return true if we can prove that +/// all callees pass in a valid pointer for the specified function argument. +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { + Function *Callee = Arg->getParent(); + const DataLayout &DL = Callee->getParent()->getDataLayout(); + + unsigned ArgNo = Arg->getArgNo(); + + // Look at all call sites of the function. At this pointer we know we only + // have direct callees. + for (User *U : Callee->users()) { + CallSite CS(U); + assert(CS && "Should only have direct calls!"); + + if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) + return false; + } + return true; +} + +/// Returns true if Prefix is a prefix of longer. That means, Longer has a size +/// that is greater than or equal to the size of prefix, and each of the +/// elements in Prefix is the same as the corresponding elements in Longer. +/// +/// This means it also returns true when Prefix and Longer are equal! +static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, + const ArgPromotion::IndicesVector &Longer) { + if (Prefix.size() > Longer.size()) + return false; + return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); +} + + +/// Checks if Indices, or a prefix of Indices, is in Set. +static bool PrefixIn(const ArgPromotion::IndicesVector &Indices, + std::set<ArgPromotion::IndicesVector> &Set) { + std::set<ArgPromotion::IndicesVector>::iterator Low; + Low = Set.upper_bound(Indices); + if (Low != Set.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This means + // it points to a prefix of Indices (possibly Indices itself), if such + // prefix exists. + // + // This load is safe if any prefix of its operands is safe to load. + return Low != Set.end() && IsPrefix(*Low, Indices); +} + +/// Mark the given indices (ToMark) as safe in the given set of indices +/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there +/// is already a prefix of Indices in Safe, Indices are implicitely marked safe +/// already. Furthermore, any indices that Indices is itself a prefix of, are +/// removed from Safe (since they are implicitely safe because of Indices now). +static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, + std::set<ArgPromotion::IndicesVector> &Safe) { + std::set<ArgPromotion::IndicesVector>::iterator Low; + Low = Safe.upper_bound(ToMark); + // Guard against the case where Safe is empty + if (Low != Safe.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This + // means it points to a prefix of Indices (possibly Indices itself), if + // such prefix exists. + if (Low != Safe.end()) { + if (IsPrefix(*Low, ToMark)) + // If there is already a prefix of these indices (or exactly these + // indices) marked a safe, don't bother adding these indices + return; + + // Increment Low, so we can use it as a "insert before" hint + ++Low; + } + // Insert + Low = Safe.insert(Low, ToMark); + ++Low; + // If there we're a prefix of longer index list(s), remove those + std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end(); + while (Low != End && IsPrefix(ToMark, *Low)) { + std::set<ArgPromotion::IndicesVector>::iterator Remove = Low; + ++Low; + Safe.erase(Remove); + } +} + +/// isSafeToPromoteArgument - As you might guess from the name of this method, +/// it checks to see if it is both safe and useful to promote the argument. +/// This method limits promotion of aggregates to only promote up to three +/// elements of the aggregate in order to avoid exploding the number of +/// arguments passed in. +bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, + bool isByValOrInAlloca, + AAResults &AAR) const { + typedef std::set<IndicesVector> GEPIndicesSet; + + // Quick exit for unused arguments + if (Arg->use_empty()) + return true; + + // We can only promote this argument if all of the uses are loads, or are GEP + // instructions (with constant indices) that are subsequently loaded. + // + // Promoting the argument causes it to be loaded in the caller + // unconditionally. This is only safe if we can prove that either the load + // would have happened in the callee anyway (ie, there is a load in the entry + // block) or the pointer passed in at every call site is guaranteed to be + // valid. + // In the former case, invalid loads can happen, but would have happened + // anyway, in the latter case, invalid loads won't happen. This prevents us + // from introducing an invalid load that wouldn't have happened in the + // original code. + // + // This set will contain all sets of indices that are loaded in the entry + // block, and thus are safe to unconditionally load in the caller. + // + // This optimization is also safe for InAlloca parameters, because it verifies + // that the address isn't captured. + GEPIndicesSet SafeToUnconditionallyLoad; + + // This set contains all the sets of indices that we are planning to promote. + // This makes it possible to limit the number of arguments added. + GEPIndicesSet ToPromote; + + // If the pointer is always valid, any load with first index 0 is valid. + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) + SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + + // First, iterate the entry block and mark loads of (geps of) arguments as + // safe. + BasicBlock &EntryBlock = Arg->getParent()->front(); + // Declare this here so we can reuse it + IndicesVector Indices; + for (Instruction &I : EntryBlock) + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Value *V = LI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { + V = GEP->getPointerOperand(); + if (V == Arg) { + // This load actually loads (part of) Arg? Check the indices then. + Indices.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) + Indices.push_back(CI->getSExtValue()); + else + // We found a non-constant GEP index for this argument? Bail out + // right away, can't promote this argument at all. + return false; + + // Indices checked out, mark them as safe + MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); + Indices.clear(); + } + } else if (V == Arg) { + // Direct loads are equivalent to a GEP with a single 0 index. + MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + } + } + + // Now, iterate all uses of the argument to see if there are any uses that are + // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + SmallVector<LoadInst*, 16> Loads; + IndicesVector Operands; + for (Use &U : Arg->uses()) { + User *UR = U.getUser(); + Operands.clear(); + if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; + Loads.push_back(LI); + // Direct loads are equivalent to a GEP with a zero index and then a load. + Operands.push_back(0); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { + if (GEP->use_empty()) { + // Dead GEP's cause trouble later. Just remove them if we run into + // them. + GEP->eraseFromParent(); + // TODO: This runs the above loop over and over again for dead GEPs + // Couldn't we just do increment the UI iterator earlier and erase the + // use? + return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR); + } + + // Ensure that all of the indices are constants. + for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); + i != e; ++i) + if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) + Operands.push_back(C->getSExtValue()); + else + return false; // Not a constant operand GEP! + + // Ensure that the only users of the GEP are load instructions. + for (User *GEPU : GEP->users()) + if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; + Loads.push_back(LI); + } else { + // Other uses than load? + return false; + } + } else { + return false; // Not a load or a GEP. + } + + // Now, see if it is safe to promote this load / loads of this GEP. Loading + // is safe if Operands, or a prefix of Operands, is marked as safe. + if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) + return false; + + // See if we are already promoting a load with these indices. If not, check + // to make sure that we aren't promoting too many elements. If so, nothing + // to do. + if (ToPromote.find(Operands) == ToPromote.end()) { + if (maxElements > 0 && ToPromote.size() == maxElements) { + DEBUG(dbgs() << "argpromotion not promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than " << maxElements << " arguments to the function.\n"); + // We limit aggregate promotion to only promoting up to a fixed number + // of elements of the aggregate. + return false; + } + ToPromote.insert(std::move(Operands)); + } + } + + if (Loads.empty()) return true; // No users, this is a dead argument. + + // Okay, now we know that the argument is only used by load instructions and + // it is safe to unconditionally perform all of them. Use alias analysis to + // check to see if the pointer is guaranteed to not be modified from entry of + // the function to each of the load instructions. + + // Because there could be several/many load instructions, remember which + // blocks we know to be transparent to the load. + SmallPtrSet<BasicBlock*, 16> TranspBlocks; + + for (unsigned i = 0, e = Loads.size(); i != e; ++i) { + // Check to see if the load is invalidated from the start of the block to + // the load itself. + LoadInst *Load = Loads[i]; + BasicBlock *BB = Load->getParent(); + + MemoryLocation Loc = MemoryLocation::get(Load); + if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) + return false; // Pointer is invalidated! + + // Now check every path from the entry block to the load for transparency. + // To do this, we perform a depth first search on the inverse CFG from the + // loading block. + for (BasicBlock *P : predecessors(BB)) { + for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) + if (AAR.canBasicBlockModify(*TranspBB, Loc)) + return false; + } + } + + // If the path from the entry of the function to each load is free of + // instructions that potentially invalidate the load, we can make the + // transformation! + return true; +} + +/// DoPromotion - This method actually performs the promotion of the specified +/// arguments, and returns the new function. At this point, we know that it's +/// safe to do so. +CallGraphNode *ArgPromotion::DoPromotion(Function *F, + SmallPtrSetImpl<Argument*> &ArgsToPromote, + SmallPtrSetImpl<Argument*> &ByValArgsToTransform) { + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has modified arguments. + FunctionType *FTy = F->getFunctionType(); + std::vector<Type*> Params; + + typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable; + + // ScalarizedElements - If we are promoting a pointer that has elements + // accessed out of it, keep track of which elements are accessed so that we + // can add one argument for each. + // + // Arguments that are directly loaded will have a zero element value here, to + // handle cases where there are both a direct load and GEP accesses. + // + std::map<Argument*, ScalarizeTable> ScalarizedElements; + + // OriginalLoads - Keep track of a representative load instruction from the + // original function so that we can tell the alias analysis implementation + // what the new GEP/Load instructions we are inserting look like. + // We need to keep the original loads for each argument and the elements + // of the argument that are accessed. + std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads; + + // Attribute - Keep track of the parameter attributes for the arguments + // that we are *not* promoting. For the ones that we do promote, the parameter + // attributes are lost + SmallVector<AttributeSet, 8> AttributesVec; + const AttributeSet &PAL = F->getAttributes(); + + // Add any return attributes. + if (PAL.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getRetAttributes())); + + // First, determine the new argument list + unsigned ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgIndex) { + if (ByValArgsToTransform.count(&*I)) { + // Simple byval argument? Just add all the struct element types. + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); + Params.insert(Params.end(), STy->element_begin(), STy->element_end()); + ++NumByValArgsPromoted; + } else if (!ArgsToPromote.count(&*I)) { + // Unchanged argument + Params.push_back(I->getType()); + AttributeSet attrs = PAL.getParamAttributes(ArgIndex); + if (attrs.hasAttributes(ArgIndex)) { + AttrBuilder B(attrs, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); + } + } else if (I->use_empty()) { + // Dead argument (which are always marked as promotable) + ++NumArgumentsDead; + } else { + // Okay, this is being promoted. This means that the only uses are loads + // or GEPs which are only used by loads + + // In this table, we will track which indices are loaded from the argument + // (where direct loads are tracked as no indices). + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + Type *SrcTy; + if (LoadInst *L = dyn_cast<LoadInst>(UI)) + SrcTy = L->getType(); + else + SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); + IndicesVector Indices; + Indices.reserve(UI->getNumOperands() - 1); + // Since loads will only have a single operand, and GEPs only a single + // non-index operand, this will record direct loads without any indices, + // and gep+loads with the GEP indices. + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); + II != IE; ++II) + Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + ArgIndices.insert(std::make_pair(SrcTy, Indices)); + LoadInst *OrigLoad; + if (LoadInst *L = dyn_cast<LoadInst>(UI)) + OrigLoad = L; + else + // Take any load, we will use it only to update Alias Analysis + OrigLoad = cast<LoadInst>(UI->user_back()); + OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; + } + + // Add a parameter to the function for each element passed in. + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + // not allowed to dereference ->begin() if size() is 0 + Params.push_back(GetElementPtrInst::getIndexedType( + cast<PointerType>(I->getType()->getScalarType())->getElementType(), + SI->second)); + assert(Params.back()); + } + + if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) + ++NumArgumentsPromoted; + else + ++NumAggregatesPromoted; + } + } + + // Add any function attributes. + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(FTy->getContext(), + PAL.getFnAttributes())); + + Type *RetTy = FTy->getReturnType(); + + // Construct the new function type using the new arguments. + FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); + + // Create the new function body and insert it into the module. + Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); + NF->copyAttributesFrom(F); + + // Patch the pointer to LLVM function in debug info descriptor. + NF->setSubprogram(F->getSubprogram()); + F->setSubprogram(nullptr); + + DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + << "From: " << *F); + + // Recompute the parameter attributes list based on the new arguments for + // the function. + NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); + AttributesVec.clear(); + + F->getParent()->getFunctionList().insert(F->getIterator(), NF); + NF->takeName(F); + + // Get the callgraph information that we need to update to reflect our + // changes. + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in the loaded pointers. + // + SmallVector<Value*, 16> Args; + while (!F->use_empty()) { + CallSite CS(F->user_back()); + assert(CS.getCalledFunction() == F); + Instruction *Call = CS.getInstruction(); + const AttributeSet &CallPAL = CS.getAttributes(); + + // Add any return attributes. + if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + CallPAL.getRetAttributes())); + + // Loop over the operands, inserting GEP and loads in the caller as + // appropriate. + CallSite::arg_iterator AI = CS.arg_begin(); + ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++AI, ++ArgIndex) + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + Args.push_back(*AI); // Unmodified argument + + if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } + } else if (ByValArgsToTransform.count(&*I)) { + // Emit a GEP and load for each element of the struct. + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = GetElementPtrInst::Create( + STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); + // TODO: Tell AA about the new values? + Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); + } + } else if (!I->use_empty()) { + // Non-dead argument: insert GEPs and loads as appropriate. + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + // Store the Value* version of the indices in here, but declare it now + // for reuse. + std::vector<Value*> Ops; + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + Value *V = *AI; + LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)]; + if (!SI->second.empty()) { + Ops.reserve(SI->second.size()); + Type *ElTy = V->getType(); + for (IndicesVector::const_iterator II = SI->second.begin(), + IE = SI->second.end(); + II != IE; ++II) { + // Use i32 to index structs, and i64 for others (pointers/arrays). + // This satisfies GEP constraints. + Type *IdxTy = (ElTy->isStructTy() ? + Type::getInt32Ty(F->getContext()) : + Type::getInt64Ty(F->getContext())); + Ops.push_back(ConstantInt::get(IdxTy, *II)); + // Keep track of the type we're currently indexing. + ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); + } + // And create a GEP to extract those indices. + V = GetElementPtrInst::Create(SI->first, V, Ops, + V->getName() + ".idx", Call); + Ops.clear(); + } + // Since we're replacing a load make sure we take the alignment + // of the previous load. + LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); + newLoad->setAlignment(OrigLoad->getAlignment()); + // Transfer the AA info too. + AAMDNodes AAInfo; + OrigLoad->getAAMetadata(AAInfo); + newLoad->setAAMetadata(AAInfo); + + Args.push_back(newLoad); + } + } + + // Push any varargs arguments on the list. + for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + Args.push_back(*AI); + if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } + } + + // Add any function attributes. + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(), + AttributesVec)); + } else { + New = CallInst::Create(NF, Args, "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(), + AttributesVec)); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + New->setDebugLoc(Call->getDebugLoc()); + Args.clear(); + AttributesVec.clear(); + + // Update the callgraph to know that the callsite has been transformed. + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); + + if (!Call->use_empty()) { + Call->replaceAllUsesWith(New); + New->takeName(Call); + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. + // + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I) { + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + // If this is an unmodified argument, move the name and users over to the + // new version. + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); + ++I2; + continue; + } + + if (ByValArgsToTransform.count(&*I)) { + // In the callee, we create an alloca, and store each of the new incoming + // arguments into the alloca. + Instruction *InsertPt = &NF->begin()->front(); + + // Just add all the struct element types. + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt); + StructType *STy = cast<StructType>(AgTy); + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; + + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = GetElementPtrInst::Create( + AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), + InsertPt); + I2->setName(I->getName()+"."+Twine(i)); + new StoreInst(&*I2++, Idx, InsertPt); + } + + // Anything that used the arg should now use the alloca. + I->replaceAllUsesWith(TheAlloca); + TheAlloca->takeName(&*I); + + // If the alloca is used in a call, we must clear the tail flag since + // the callee now uses an alloca from the caller. + for (User *U : TheAlloca->users()) { + CallInst *Call = dyn_cast<CallInst>(U); + if (!Call) + continue; + Call->setTailCall(false); + } + continue; + } + + if (I->use_empty()) + continue; + + // Otherwise, if we promoted this argument, then all users are load + // instructions (or GEPs with only load users), and all loads should be + // using the new argument that we added. + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + + while (!I->use_empty()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { + assert(ArgIndices.begin()->second.empty() && + "Load element should sort to front!"); + I2->setName(I->getName()+".val"); + LI->replaceAllUsesWith(&*I2); + LI->eraseFromParent(); + DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"); + } else { + GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); + IndicesVector Operands; + Operands.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); + + // GEPs with a single 0 index can be merged with direct loads + if (Operands.size() == 1 && Operands.front() == 0) + Operands.clear(); + + Function::arg_iterator TheArg = I2; + for (ScalarizeTable::iterator It = ArgIndices.begin(); + It->second != Operands; ++It, ++TheArg) { + assert(It != ArgIndices.end() && "GEP not handled??"); + } + + std::string NewName = I->getName(); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + NewName += "." + utostr(Operands[i]); + } + NewName += ".val"; + TheArg->setName(NewName); + + DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << NF->getName() << "'\n"); + + // All of the uses must be load instructions. Replace them all with + // the argument specified by ArgNo. + while (!GEP->use_empty()) { + LoadInst *L = cast<LoadInst>(GEP->user_back()); + L->replaceAllUsesWith(&*TheArg); + L->eraseFromParent(); + } + GEP->eraseFromParent(); + } + } + + // Increment I2 past all of the arguments added for this promoted pointer. + std::advance(I2, ArgIndices.size()); + } + + NF_CGN->stealCalledFunctionsFrom(CG[F]); + + // Now that the old function is dead, delete it. If there is a dangling + // reference to the CallgraphNode, just leave the dead function around for + // someone else to nuke. + CallGraphNode *CGN = CG[F]; + if (CGN->getNumReferences() == 0) + delete CG.removeFunctionFromModule(CGN); + else + F->setLinkage(Function::ExternalLinkage); + + return NF_CGN; +} + +bool ArgPromotion::doInitialization(CallGraph &CG) { + return CallGraphSCCPass::doInitialization(CG); +} diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp new file mode 100644 index 0000000..6af1043 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp @@ -0,0 +1,47 @@ +//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// NOTE: DO NOT USE THIS IF AVOIDABLE +// +// This pass is a nonce pass intended to allow manipulation of the implicitly +// nesting pass manager. For example, it can be used to cause a CGSCC pass +// manager to be closed prior to running a new collection of function passes. +// +// FIXME: This is a huge HACK. This should be removed when the pass manager's +// nesting is made explicit instead of implicit. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" +using namespace llvm; + +namespace { +/// \brief A nonce module pass used to place a barrier in a pass manager. +/// +/// There is no mechanism for ending a CGSCC pass manager once one is started. +/// This prevents extension points from having clear deterministic ordering +/// when they are phrased as non-module passes. +class BarrierNoop : public ModulePass { +public: + static char ID; // Pass identification. + + BarrierNoop() : ModulePass(ID) { + initializeBarrierNoopPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { return false; } +}; +} + +ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); } + +char BarrierNoop::ID = 0; +INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass", + false, false) diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp new file mode 100644 index 0000000..0aa49d6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -0,0 +1,222 @@ +//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to a pass that merges duplicate global +// constants together into a single constant that is shared. This is useful +// because some passes (ie TraceValues) insert a lot of string constants into +// the program, regardless of whether or not an existing string is available. +// +// Algorithm: ConstantMerge is designed to build up a map of available constants +// and eliminate duplicates when it is initialized. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "constmerge" + +STATISTIC(NumMerged, "Number of global constants merged"); + +namespace { + struct ConstantMerge : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ConstantMerge() : ModulePass(ID) { + initializeConstantMergePass(*PassRegistry::getPassRegistry()); + } + + // For this pass, process all of the globals in the module, eliminating + // duplicate constants. + bool runOnModule(Module &M) override; + + // Return true iff we can determine the alignment of this global variable. + bool hasKnownAlignment(GlobalVariable *GV) const; + + // Return the alignment of the global, including converting the default + // alignment to a concrete value. + unsigned getAlignment(GlobalVariable *GV) const; + + }; +} + +char ConstantMerge::ID = 0; +INITIALIZE_PASS(ConstantMerge, "constmerge", + "Merge Duplicate Global Constants", false, false) + +ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + + + +/// Find values that are marked as llvm.used. +static void FindUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSetImpl<const GlobalValue*> &UsedValues) { + if (!LLVMUsed) return; + ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { + Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases(); + GlobalValue *GV = cast<GlobalValue>(Operand); + UsedValues.insert(GV); + } +} + +// True if A is better than B. +static bool IsBetterCanonical(const GlobalVariable &A, + const GlobalVariable &B) { + if (!A.hasLocalLinkage() && B.hasLocalLinkage()) + return true; + + if (A.hasLocalLinkage() && !B.hasLocalLinkage()) + return false; + + return A.hasUnnamedAddr(); +} + +unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { + unsigned Align = GV->getAlignment(); + if (Align) + return Align; + return GV->getParent()->getDataLayout().getPreferredAlignment(GV); +} + +bool ConstantMerge::runOnModule(Module &M) { + + // Find all the globals that are marked "used". These cannot be merged. + SmallPtrSet<const GlobalValue*, 8> UsedGlobals; + FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); + FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); + + // Map unique constants to globals. + DenseMap<Constant *, GlobalVariable *> CMap; + + // Replacements - This vector contains a list of replacements to perform. + SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; + + bool MadeChange = false; + + // Iterate constant merging while we are still making progress. Merging two + // constants together may allow us to merge other constants together if the + // second level constants have initializers which point to the globals that + // were just merged. + while (1) { + + // First: Find the canonical constants others will be merged with. + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = &*GVI++; + + // If this GV is dead, remove it. + GV->removeDeadConstantUsers(); + if (GV->use_empty() && GV->hasLocalLinkage()) { + GV->eraseFromParent(); + continue; + } + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) + continue; + + // This transformation is legal for weak ODR globals in the sense it + // doesn't change semantics, but we really don't want to perform it + // anyway; it's likely to pessimize code generation, and some tools + // (like the Darwin linker in cases involving CFString) don't expect it. + if (GV->isWeakForLinker()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *&Slot = CMap[Init]; + + // If this is the first constant we find or if the old one is local, + // replace with the current one. If the current is externally visible + // it cannot be replace, but can be the canonical constant we merge with. + if (!Slot || IsBetterCanonical(*GV, *Slot)) + Slot = GV; + } + + // Second: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = &*GVI++; + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) + continue; + + // We can only replace constant with local linkage. + if (!GV->hasLocalLinkage()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *Slot = CMap[Init]; + + if (!Slot || Slot == GV) + continue; + + if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr()) + continue; + + if (!GV->hasUnnamedAddr()) + Slot->setUnnamedAddr(false); + + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } + + if (Replacements.empty()) + return MadeChange; + CMap.clear(); + + // Now that we have figured out which replacements must be made, do them all + // now. This avoid invalidating the pointers in CMap, which are unneeded + // now. + for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Bump the alignment if necessary. + if (Replacements[i].first->getAlignment() || + Replacements[i].second->getAlignment()) { + Replacements[i].second->setAlignment( + std::max(getAlignment(Replacements[i].first), + getAlignment(Replacements[i].second))); + } + + // Eliminate any uses of the dead global. + Replacements[i].first->replaceAllUsesWith(Replacements[i].second); + + // Delete the global value from the module. + assert(Replacements[i].first->hasLocalLinkage() && + "Refusing to delete an externally visible global variable."); + Replacements[i].first->eraseFromParent(); + } + + NumMerged += Replacements.size(); + Replacements.clear(); + } +} diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp new file mode 100644 index 0000000..5bbb751 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -0,0 +1,166 @@ +//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass exports all llvm.bitset's found in the module in the form of a +// __cfi_check function, which can be used to verify cross-DSO call targets. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "cross-dso-cfi" + +STATISTIC(TypeIds, "Number of unique type identifiers"); + +namespace { + +struct CrossDSOCFI : public ModulePass { + static char ID; + CrossDSOCFI() : ModulePass(ID) { + initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry()); + } + + Module *M; + MDNode *VeryLikelyWeights; + + ConstantInt *extractBitSetTypeId(MDNode *MD); + void buildCFICheck(); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, + false) +INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false) +char CrossDSOCFI::ID = 0; + +ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; } + +bool CrossDSOCFI::doInitialization(Module &Mod) { + M = &Mod; + VeryLikelyWeights = + MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1); + + return false; +} + +/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode. +ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) { + // This check excludes vtables for classes inside anonymous namespaces. + auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0)); + if (!TM) + return nullptr; + auto C = dyn_cast_or_null<ConstantInt>(TM->getValue()); + if (!C) return nullptr; + // We are looking for i64 constants. + if (C->getBitWidth() != 64) return nullptr; + + // Sanity check. + auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1)); + // Can be null if a function was removed by an optimization. + if (FM) { + auto F = dyn_cast<Function>(FM->getValue()); + // But can never be a function declaration. + assert(!F || !F->isDeclaration()); + (void)F; // Suppress unused variable warning in the no-asserts build. + } + return C; +} + +/// buildCFICheck - emits __cfi_check for the current module. +void CrossDSOCFI::buildCFICheck() { + // FIXME: verify that __cfi_check ends up near the end of the code section, + // but before the jump slots created in LowerBitSets. + llvm::DenseSet<uint64_t> BitSetIds; + NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets"); + + if (BitSetNM) + for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) + if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I))) + BitSetIds.insert(TypeId->getZExtValue()); + + LLVMContext &Ctx = M->getContext(); + Constant *C = M->getOrInsertFunction( + "__cfi_check", + FunctionType::get( + Type::getVoidTy(Ctx), + {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))}, + false)); + Function *F = dyn_cast<Function>(C); + F->setAlignment(4096); + auto args = F->arg_begin(); + Argument &CallSiteTypeId = *(args++); + CallSiteTypeId.setName("CallSiteTypeId"); + Argument &Addr = *(args++); + Addr.setName("Addr"); + assert(args == F->arg_end()); + + BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F); + + BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F); + IRBuilder<> IRBTrap(TrapBB); + Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); + llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + IRBTrap.CreateUnreachable(); + + BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F); + IRBuilder<> IRBExit(ExitBB); + IRBExit.CreateRetVoid(); + + IRBuilder<> IRB(BB); + SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size()); + for (uint64_t TypeId : BitSetIds) { + ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId); + BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F); + IRBuilder<> IRBTest(TestBB); + Function *BitsetTestFn = + Intrinsic::getDeclaration(M, Intrinsic::bitset_test); + + Value *Test = IRBTest.CreateCall( + BitsetTestFn, {&Addr, MetadataAsValue::get( + Ctx, ConstantAsMetadata::get(CaseTypeId))}); + BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB); + BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights); + + SI->addCase(CaseTypeId, TestBB); + ++TypeIds; + } +} + +bool CrossDSOCFI::runOnModule(Module &M) { + if (M.getModuleFlag("Cross-DSO CFI") == nullptr) + return false; + buildCFICheck(); + return true; +} diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp new file mode 100644 index 0000000..4de3d95 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -0,0 +1,1133 @@ +//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass deletes dead arguments from internal functions. Dead argument +// elimination removes arguments which are directly dead, as well as arguments +// only passed into function calls as dead arguments of other functions. This +// pass also deletes dead return values in a similar way. +// +// This pass is often useful as a cleanup pass to run after aggressive +// interprocedural passes, which add possibly-dead arguments or return values. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <map> +#include <set> +#include <tuple> +using namespace llvm; + +#define DEBUG_TYPE "deadargelim" + +STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); +STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); +STATISTIC(NumArgumentsReplacedWithUndef, + "Number of unread args replaced with undef"); +namespace { + /// DAE - The dead argument elimination pass. + /// + class DAE : public ModulePass { + public: + + /// Struct that represents (part of) either a return value or a function + /// argument. Used so that arguments and return values can be used + /// interchangeably. + struct RetOrArg { + RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), + IsArg(IsArg) {} + const Function *F; + unsigned Idx; + bool IsArg; + + /// Make RetOrArg comparable, so we can put it into a map. + bool operator<(const RetOrArg &O) const { + return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg); + } + + /// Make RetOrArg comparable, so we can easily iterate the multimap. + bool operator==(const RetOrArg &O) const { + return F == O.F && Idx == O.Idx && IsArg == O.IsArg; + } + + std::string getDescription() const { + return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) + + " of function " + F->getName()).str(); + } + }; + + /// Liveness enum - During our initial pass over the program, we determine + /// that things are either alive or maybe alive. We don't mark anything + /// explicitly dead (even if we know they are), since anything not alive + /// with no registered uses (in Uses) will never be marked alive and will + /// thus become dead in the end. + enum Liveness { Live, MaybeLive }; + + /// Convenience wrapper + RetOrArg CreateRet(const Function *F, unsigned Idx) { + return RetOrArg(F, Idx, false); + } + /// Convenience wrapper + RetOrArg CreateArg(const Function *F, unsigned Idx) { + return RetOrArg(F, Idx, true); + } + + typedef std::multimap<RetOrArg, RetOrArg> UseMap; + /// This maps a return value or argument to any MaybeLive return values or + /// arguments it uses. This allows the MaybeLive values to be marked live + /// when any of its users is marked live. + /// For example (indices are left out for clarity): + /// - Uses[ret F] = ret G + /// This means that F calls G, and F returns the value returned by G. + /// - Uses[arg F] = ret G + /// This means that some function calls G and passes its result as an + /// argument to F. + /// - Uses[ret F] = arg F + /// This means that F returns one of its own arguments. + /// - Uses[arg F] = arg G + /// This means that G calls F and passes one of its own (G's) arguments + /// directly to F. + UseMap Uses; + + typedef std::set<RetOrArg> LiveSet; + typedef std::set<const Function*> LiveFuncSet; + + /// This set contains all values that have been determined to be live. + LiveSet LiveValues; + /// This set contains all values that are cannot be changed in any way. + LiveFuncSet LiveFunctions; + + typedef SmallVector<RetOrArg, 5> UseVector; + + protected: + // DAH uses this to specify a different ID. + explicit DAE(char &ID) : ModulePass(ID) {} + + public: + static char ID; // Pass identification, replacement for typeid + DAE() : ModulePass(ID) { + initializeDAEPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + virtual bool ShouldHackArguments() const { return false; } + + private: + Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); + Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, + unsigned RetValNum = -1U); + Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); + + void SurveyFunction(const Function &F); + void MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses); + void MarkLive(const RetOrArg &RA); + void MarkLive(const Function &F); + void PropagateLiveness(const RetOrArg &RA); + bool RemoveDeadStuffFromFunction(Function *F); + bool DeleteDeadVarargs(Function &Fn); + bool RemoveDeadArgumentsFromCallers(Function &Fn); + }; +} + + +char DAE::ID = 0; +INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) + +namespace { + /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but + /// deletes arguments to functions which are external. This is only for use + /// by bugpoint. + struct DAH : public DAE { + static char ID; + DAH() : DAE(ID) {} + + bool ShouldHackArguments() const override { return true; } + }; +} + +char DAH::ID = 0; +INITIALIZE_PASS(DAH, "deadarghaX0r", + "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", + false, false) + +/// createDeadArgEliminationPass - This pass removes arguments from functions +/// which are not used by the body of the function. +/// +ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } +ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } + +/// DeleteDeadVarargs - If this is an function that takes a ... list, and if +/// llvm.vastart is never called, the varargs list is dead for the function. +bool DAE::DeleteDeadVarargs(Function &Fn) { + assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); + if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; + + // Ensure that the function is only directly called. + if (Fn.hasAddressTaken()) + return false; + + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (Fn.hasFnAttribute(Attribute::Naked)) { + return false; + } + + // Okay, we know we can transform this function if safe. Scan its body + // looking for calls marked musttail or calls to llvm.vastart. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + continue; + if (CI->isMustTailCall()) + return false; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + return false; + } + } + } + + // If we get here, there are no calls to llvm.vastart in the function body, + // remove the "..." and adjust all the calls. + + // Start by computing a new prototype for the function, which is the same as + // the old function, but doesn't have isVarArg set. + FunctionType *FTy = Fn.getFunctionType(); + + std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), + Params, false); + unsigned NumArgs = Params.size(); + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, Fn.getLinkage()); + NF->copyAttributesFrom(&Fn); + Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); + NF->takeName(&Fn); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector<Value*> Args; + for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) { + CallSite CS(*I++); + if (!CS) + continue; + Instruction *Call = CS.getInstruction(); + + // Pass all the same arguments. + Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); + + // Drop any attributes that were on the vararg arguments. + AttributeSet PAL = CS.getAttributes(); + if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { + SmallVector<AttributeSet, 8> AttributesVec; + for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) + AttributesVec.push_back(PAL.getSlotAttributes(i)); + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Fn.getContext(), + PAL.getFnAttributes())); + PAL = AttributeSet::get(Fn.getContext(), AttributesVec); + } + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(PAL); + } else { + New = CallInst::Create(NF, Args, "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(PAL); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + New->setDebugLoc(Call->getDebugLoc()); + + Args.clear(); + + if (!Call->use_empty()) + Call->replaceAllUsesWith(New); + + New->takeName(Call); + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); + + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. While we're at + // it, remove the dead arguments from the DeadArguments list. + // + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++I2) { + // Move the name and users over to the new version. + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); + } + + // Patch the pointer to LLVM function in debug info descriptor. + NF->setSubprogram(Fn.getSubprogram()); + + // Fix up any BlockAddresses that refer to the function. + Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); + // Delete the bitcast that we just created, so that NF does not + // appear to be address-taken. + NF->removeDeadConstantUsers(); + // Finally, nuke the old function. + Fn.eraseFromParent(); + return true; +} + +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any +/// arguments that are unused, and changes the caller parameters to be undefined +/// instead. +bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) +{ + // We cannot change the arguments if this TU does not define the function or + // if the linker may choose a function body from another TU, even if the + // nominal linkage indicates that other copies of the function have the same + // semantics. In the below example, the dead load from %p may not have been + // eliminated from the linker-chosen copy of f, so replacing %p with undef + // in callers may introduce undefined behavior. + // + // define linkonce_odr void @f(i32* %p) { + // %v = load i32 %p + // ret void + // } + if (!Fn.isStrongDefinitionForLinker()) + return false; + + // Functions with local linkage should already have been handled, except the + // fragile (variadic) ones which we can improve here. + if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) + return false; + + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (Fn.hasFnAttribute(Attribute::Naked)) + return false; + + if (Fn.use_empty()) + return false; + + SmallVector<unsigned, 8> UnusedArgs; + for (Argument &Arg : Fn.args()) { + if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) + UnusedArgs.push_back(Arg.getArgNo()); + } + + if (UnusedArgs.empty()) + return false; + + bool Changed = false; + + for (Use &U : Fn.uses()) { + CallSite CS(U.getUser()); + if (!CS || !CS.isCallee(&U)) + continue; + + // Now go through all unused args and replace them with "undef". + for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { + unsigned ArgNo = UnusedArgs[I]; + + Value *Arg = CS.getArgument(ArgNo); + CS.setArgument(ArgNo, UndefValue::get(Arg->getType())); + ++NumArgumentsReplacedWithUndef; + Changed = true; + } + } + + return Changed; +} + +/// Convenience function that returns the number of return values. It returns 0 +/// for void functions and 1 for functions not returning a struct. It returns +/// the number of struct elements for functions returning a struct. +static unsigned NumRetVals(const Function *F) { + Type *RetTy = F->getReturnType(); + if (RetTy->isVoidTy()) + return 0; + else if (StructType *STy = dyn_cast<StructType>(RetTy)) + return STy->getNumElements(); + else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy)) + return ATy->getNumElements(); + else + return 1; +} + +/// Returns the sub-type a function will return at a given Idx. Should +/// correspond to the result type of an ExtractValue instruction executed with +/// just that one Idx (i.e. only top-level structure is considered). +static Type *getRetComponentType(const Function *F, unsigned Idx) { + Type *RetTy = F->getReturnType(); + assert(!RetTy->isVoidTy() && "void type has no subtype"); + + if (StructType *STy = dyn_cast<StructType>(RetTy)) + return STy->getElementType(Idx); + else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy)) + return ATy->getElementType(); + else + return RetTy; +} + +/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not +/// live, it adds Use to the MaybeLiveUses argument. Returns the determined +/// liveness of Use. +DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { + // We're live if our use or its Function is already marked as live. + if (LiveFunctions.count(Use.F) || LiveValues.count(Use)) + return Live; + + // We're maybe live otherwise, but remember that we must become live if + // Use becomes live. + MaybeLiveUses.push_back(Use); + return MaybeLive; +} + + +/// SurveyUse - This looks at a single use of an argument or return value +/// and determines if it should be alive or not. Adds this use to MaybeLiveUses +/// if it causes the used value to become MaybeLive. +/// +/// RetValNum is the return value number to use when this use is used in a +/// return instruction. This is used in the recursion, you should always leave +/// it at 0. +DAE::Liveness DAE::SurveyUse(const Use *U, + UseVector &MaybeLiveUses, unsigned RetValNum) { + const User *V = U->getUser(); + if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { + // The value is returned from a function. It's only live when the + // function's return value is live. We use RetValNum here, for the case + // that U is really a use of an insertvalue instruction that uses the + // original Use. + const Function *F = RI->getParent()->getParent(); + if (RetValNum != -1U) { + RetOrArg Use = CreateRet(F, RetValNum); + // We might be live, depending on the liveness of Use. + return MarkIfNotLive(Use, MaybeLiveUses); + } else { + DAE::Liveness Result = MaybeLive; + for (unsigned i = 0; i < NumRetVals(F); ++i) { + RetOrArg Use = CreateRet(F, i); + // We might be live, depending on the liveness of Use. If any + // sub-value is live, then the entire value is considered live. This + // is a conservative choice, and better tracking is possible. + DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses); + if (Result != Live) + Result = SubResult; + } + return Result; + } + } + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { + if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() + && IV->hasIndices()) + // The use we are examining is inserted into an aggregate. Our liveness + // depends on all uses of that aggregate, but if it is used as a return + // value, only index at which we were inserted counts. + RetValNum = *IV->idx_begin(); + + // Note that if we are used as the aggregate operand to the insertvalue, + // we don't change RetValNum, but do survey all our uses. + + Liveness Result = MaybeLive; + for (const Use &UU : IV->uses()) { + Result = SurveyUse(&UU, MaybeLiveUses, RetValNum); + if (Result == Live) + break; + } + return Result; + } + + if (auto CS = ImmutableCallSite(V)) { + const Function *F = CS.getCalledFunction(); + if (F) { + // Used in a direct call. + + // The function argument is live if it is used as a bundle operand. + if (CS.isBundleOperand(U)) + return Live; + + // Find the argument number. We know for sure that this use is an + // argument, since if it was the function argument this would be an + // indirect call and the we know can't be looking at a value of the + // label type (for the invoke instruction). + unsigned ArgNo = CS.getArgumentNo(U); + + if (ArgNo >= F->getFunctionType()->getNumParams()) + // The value is passed in through a vararg! Must be live. + return Live; + + assert(CS.getArgument(ArgNo) + == CS->getOperand(U->getOperandNo()) + && "Argument is not where we expected it"); + + // Value passed to a normal call. It's only live when the corresponding + // argument to the called function turns out live. + RetOrArg Use = CreateArg(F, ArgNo); + return MarkIfNotLive(Use, MaybeLiveUses); + } + } + // Used in any other way? Value must be live. + return Live; +} + +/// SurveyUses - This looks at all the uses of the given value +/// Returns the Liveness deduced from the uses of this value. +/// +/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If +/// the result is Live, MaybeLiveUses might be modified but its content should +/// be ignored (since it might not be complete). +DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { + // Assume it's dead (which will only hold if there are no uses at all..). + Liveness Result = MaybeLive; + // Check each use. + for (const Use &U : V->uses()) { + Result = SurveyUse(&U, MaybeLiveUses); + if (Result == Live) + break; + } + return Result; +} + +// SurveyFunction - This performs the initial survey of the specified function, +// checking out whether or not it uses any of its incoming arguments or whether +// any callers use the return value. This fills in the LiveValues set and Uses +// map. +// +// We consider arguments of non-internal functions to be intrinsically alive as +// well as arguments to functions which have their "address taken". +// +void DAE::SurveyFunction(const Function &F) { + // Functions with inalloca parameters are expecting args in a particular + // register and memory layout. + if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) { + MarkLive(F); + return; + } + + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (F.hasFnAttribute(Attribute::Naked)) { + MarkLive(F); + return; + } + + unsigned RetCount = NumRetVals(&F); + // Assume all return values are dead + typedef SmallVector<Liveness, 5> RetVals; + RetVals RetValLiveness(RetCount, MaybeLive); + + typedef SmallVector<UseVector, 5> RetUses; + // These vectors map each return value to the uses that make it MaybeLive, so + // we can add those to the Uses map if the return value really turns out to be + // MaybeLive. Initialized to a list of RetCount empty lists. + RetUses MaybeLiveRetUses(RetCount); + + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() + != F.getFunctionType()->getReturnType()) { + // We don't support old style multiple return values. + MarkLive(F); + return; + } + + if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) { + MarkLive(F); + return; + } + + DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); + // Keep track of the number of live retvals, so we can skip checks once all + // of them turn out to be live. + unsigned NumLiveRetVals = 0; + // Loop all uses of the function. + for (const Use &U : F.uses()) { + // If the function is PASSED IN as an argument, its address has been + // taken. + ImmutableCallSite CS(U.getUser()); + if (!CS || !CS.isCallee(&U)) { + MarkLive(F); + return; + } + + // If this use is anything other than a call site, the function is alive. + const Instruction *TheCall = CS.getInstruction(); + if (!TheCall) { // Not a direct call site? + MarkLive(F); + return; + } + + // If we end up here, we are looking at a direct call to our function. + + // Now, check how our return value(s) is/are used in this caller. Don't + // bother checking return values if all of them are live already. + if (NumLiveRetVals == RetCount) + continue; + + // Check all uses of the return value. + for (const Use &U : TheCall->uses()) { + if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) { + // This use uses a part of our return value, survey the uses of + // that part and store the results for this index only. + unsigned Idx = *Ext->idx_begin(); + if (RetValLiveness[Idx] != Live) { + RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); + if (RetValLiveness[Idx] == Live) + NumLiveRetVals++; + } + } else { + // Used by something else than extractvalue. Survey, but assume that the + // result applies to all sub-values. + UseVector MaybeLiveAggregateUses; + if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) { + NumLiveRetVals = RetCount; + RetValLiveness.assign(RetCount, Live); + break; + } else { + for (unsigned i = 0; i != RetCount; ++i) { + if (RetValLiveness[i] != Live) + MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(), + MaybeLiveAggregateUses.end()); + } + } + } + } + } + + // Now we've inspected all callers, record the liveness of our return values. + for (unsigned i = 0; i != RetCount; ++i) + MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); + + DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); + + // Now, check all of our arguments. + unsigned i = 0; + UseVector MaybeLiveArgUses; + for (Function::const_arg_iterator AI = F.arg_begin(), + E = F.arg_end(); AI != E; ++AI, ++i) { + Liveness Result; + if (F.getFunctionType()->isVarArg()) { + // Variadic functions will already have a va_arg function expanded inside + // them, making them potentially very sensitive to ABI changes resulting + // from removing arguments entirely, so don't. For example AArch64 handles + // register and stack HFAs very differently, and this is reflected in the + // IR which has already been generated. + Result = Live; + } else { + // See what the effect of this use is (recording any uses that cause + // MaybeLive in MaybeLiveArgUses). + Result = SurveyUses(&*AI, MaybeLiveArgUses); + } + + // Mark the result. + MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses); + // Clear the vector again for the next iteration. + MaybeLiveArgUses.clear(); + } +} + +/// MarkValue - This function marks the liveness of RA depending on L. If L is +/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses, +/// such that RA will be marked live if any use in MaybeLiveUses gets marked +/// live later on. +void DAE::MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses) { + switch (L) { + case Live: MarkLive(RA); break; + case MaybeLive: + { + // Note any uses of this value, so this return value can be + // marked live whenever one of the uses becomes live. + for (UseVector::const_iterator UI = MaybeLiveUses.begin(), + UE = MaybeLiveUses.end(); UI != UE; ++UI) + Uses.insert(std::make_pair(*UI, RA)); + break; + } + } +} + +/// MarkLive - Mark the given Function as alive, meaning that it cannot be +/// changed in any way. Additionally, +/// mark any values that are used as this function's parameters or by its return +/// values (according to Uses) live as well. +void DAE::MarkLive(const Function &F) { + DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); + // Mark the function as live. + LiveFunctions.insert(&F); + // Mark all arguments as live. + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) + PropagateLiveness(CreateArg(&F, i)); + // Mark all return values as live. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + PropagateLiveness(CreateRet(&F, i)); +} + +/// MarkLive - Mark the given return value or argument as live. Additionally, +/// mark any values that are used by this value (according to Uses) live as +/// well. +void DAE::MarkLive(const RetOrArg &RA) { + if (LiveFunctions.count(RA.F)) + return; // Function was already marked Live. + + if (!LiveValues.insert(RA).second) + return; // We were already marked Live. + + DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n"); + PropagateLiveness(RA); +} + +/// PropagateLiveness - Given that RA is a live value, propagate it's liveness +/// to any other values it uses (according to Uses). +void DAE::PropagateLiveness(const RetOrArg &RA) { + // We don't use upper_bound (or equal_range) here, because our recursive call + // to ourselves is likely to cause the upper_bound (which is the first value + // not belonging to RA) to become erased and the iterator invalidated. + UseMap::iterator Begin = Uses.lower_bound(RA); + UseMap::iterator E = Uses.end(); + UseMap::iterator I; + for (I = Begin; I != E && I->first == RA; ++I) + MarkLive(I->second); + + // Erase RA from the Uses map (from the lower bound to wherever we ended up + // after the loop). + Uses.erase(Begin, I); +} + +// RemoveDeadStuffFromFunction - Remove any arguments and return values from F +// that are not in LiveValues. Transform the function and all of the callees of +// the function to not have these arguments and return values. +// +bool DAE::RemoveDeadStuffFromFunction(Function *F) { + // Don't modify fully live functions + if (LiveFunctions.count(F)) + return false; + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has fewer arguments and a different return type. + FunctionType *FTy = F->getFunctionType(); + std::vector<Type*> Params; + + // Keep track of if we have a live 'returned' argument + bool HasLiveReturnedArg = false; + + // Set up to build a new list of parameter attributes. + SmallVector<AttributeSet, 8> AttributesVec; + const AttributeSet &PAL = F->getAttributes(); + + // Remember which arguments are still alive. + SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false); + // Construct the new parameter list from non-dead arguments. Also construct + // a new set of parameter attributes to correspond. Skip the first parameter + // attribute, since that belongs to the return value. + unsigned i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++i) { + RetOrArg Arg = CreateArg(F, i); + if (LiveValues.erase(Arg)) { + Params.push_back(I->getType()); + ArgAlive[i] = true; + + // Get the original parameter attributes (skipping the first one, that is + // for the return value. + if (PAL.hasAttributes(i + 1)) { + AttrBuilder B(PAL, i + 1); + if (B.contains(Attribute::Returned)) + HasLiveReturnedArg = true; + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); + } + } else { + ++NumArgumentsEliminated; + DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() + << ") from " << F->getName() << "\n"); + } + } + + // Find out the new return value. + Type *RetTy = FTy->getReturnType(); + Type *NRetTy = nullptr; + unsigned RetCount = NumRetVals(F); + + // -1 means unused, other numbers are the new index + SmallVector<int, 5> NewRetIdxs(RetCount, -1); + std::vector<Type*> RetTypes; + + // If there is a function with a live 'returned' argument but a dead return + // value, then there are two possible actions: + // 1) Eliminate the return value and take off the 'returned' attribute on the + // argument. + // 2) Retain the 'returned' attribute and treat the return value (but not the + // entire function) as live so that it is not eliminated. + // + // It's not clear in the general case which option is more profitable because, + // even in the absence of explicit uses of the return value, code generation + // is free to use the 'returned' attribute to do things like eliding + // save/restores of registers across calls. Whether or not this happens is + // target and ABI-specific as well as depending on the amount of register + // pressure, so there's no good way for an IR-level pass to figure this out. + // + // Fortunately, the only places where 'returned' is currently generated by + // the FE are places where 'returned' is basically free and almost always a + // performance win, so the second option can just be used always for now. + // + // This should be revisited if 'returned' is ever applied more liberally. + if (RetTy->isVoidTy() || HasLiveReturnedArg) { + NRetTy = RetTy; + } else { + // Look at each of the original return values individually. + for (unsigned i = 0; i != RetCount; ++i) { + RetOrArg Ret = CreateRet(F, i); + if (LiveValues.erase(Ret)) { + RetTypes.push_back(getRetComponentType(F, i)); + NewRetIdxs[i] = RetTypes.size() - 1; + } else { + ++NumRetValsEliminated; + DEBUG(dbgs() << "DAE - Removing return value " << i << " from " + << F->getName() << "\n"); + } + } + if (RetTypes.size() > 1) { + // More than one return type? Reduce it down to size. + if (StructType *STy = dyn_cast<StructType>(RetTy)) { + // Make the new struct packed if we used to return a packed struct + // already. + NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); + } else { + assert(isa<ArrayType>(RetTy) && "unexpected multi-value return"); + NRetTy = ArrayType::get(RetTypes[0], RetTypes.size()); + } + } else if (RetTypes.size() == 1) + // One return type? Just a simple value then, but only if we didn't use to + // return a struct with that simple value before. + NRetTy = RetTypes.front(); + else if (RetTypes.size() == 0) + // No return types? Make it void, but only if we didn't use to return {}. + NRetTy = Type::getVoidTy(F->getContext()); + } + + assert(NRetTy && "No new return type found?"); + + // The existing function return attributes. + AttributeSet RAttrs = PAL.getRetAttributes(); + + // Remove any incompatible attributes, but only if we removed all return + // values. Otherwise, ensure that we don't have any conflicting attributes + // here. Currently, this should not be possible, but special handling might be + // required when new return value attributes are added. + if (NRetTy->isVoidTy()) + RAttrs = RAttrs.removeAttributes(NRetTy->getContext(), + AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(NRetTy)); + else + assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). + overlaps(AttributeFuncs::typeIncompatible(NRetTy)) && + "Return attributes no longer compatible?"); + + if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs)); + + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getFnAttributes())); + + // Reconstruct the AttributesList based on the vector we constructed. + AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); + + // Create the new function type based on the recomputed parameters. + FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); + + // No change? + if (NFTy == FTy) + return false; + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, F->getLinkage()); + NF->copyAttributesFrom(F); + NF->setAttributes(NewPAL); + // Insert the new function before the old function, so we won't be processing + // it again. + F->getParent()->getFunctionList().insert(F->getIterator(), NF); + NF->takeName(F); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector<Value*> Args; + while (!F->use_empty()) { + CallSite CS(F->user_back()); + Instruction *Call = CS.getInstruction(); + + AttributesVec.clear(); + const AttributeSet &CallPAL = CS.getAttributes(); + + // The call return attributes. + AttributeSet RAttrs = CallPAL.getRetAttributes(); + + // Adjust in case the function was changed to return void. + RAttrs = RAttrs.removeAttributes(NRetTy->getContext(), + AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(NF->getReturnType())); + if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs)); + + // Declare these outside of the loops, so we can reuse them for the second + // loop, which loops the varargs. + CallSite::arg_iterator I = CS.arg_begin(); + unsigned i = 0; + // Loop over those operands, corresponding to the normal arguments to the + // original function, and add those that are still alive. + for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i) + if (ArgAlive[i]) { + Args.push_back(*I); + // Get original parameter attributes, but skip return attributes. + if (CallPAL.hasAttributes(i + 1)) { + AttrBuilder B(CallPAL, i + 1); + // If the return type has changed, then get rid of 'returned' on the + // call site. The alternative is to make all 'returned' attributes on + // call sites keep the return value alive just like 'returned' + // attributes on function declaration but it's less clearly a win + // and this is not an expected case anyway + if (NRetTy != RetTy && B.contains(Attribute::Returned)) + B.removeAttribute(Attribute::Returned); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } + } + + // Push any varargs arguments on the list. Don't forget their attributes. + for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { + Args.push_back(*I); + if (CallPAL.hasAttributes(i + 1)) { + AttrBuilder B(CallPAL, i + 1); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } + } + + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); + + // Reconstruct the AttributesList based on the vector we constructed. + AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, "", Call->getParent()); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(NewCallPAL); + } else { + New = CallInst::Create(NF, Args, "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(NewCallPAL); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + New->setDebugLoc(Call->getDebugLoc()); + + Args.clear(); + + if (!Call->use_empty()) { + if (New->getType() == Call->getType()) { + // Return type not changed? Just replace users then. + Call->replaceAllUsesWith(New); + New->takeName(Call); + } else if (New->getType()->isVoidTy()) { + // Our return value has uses, but they will get removed later on. + // Replace by null for now. + if (!Call->getType()->isX86_MMXTy()) + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + } else { + assert((RetTy->isStructTy() || RetTy->isArrayTy()) && + "Return type changed, but not into a void. The old return type" + " must have been a struct or an array!"); + Instruction *InsertPt = Call; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest()); + InsertPt = &*NewEdge->getFirstInsertionPt(); + } + + // We used to return a struct or array. Instead of doing smart stuff + // with all the uses, we will just rebuild it using extract/insertvalue + // chaining and let instcombine clean that up. + // + // Start out building up our return value from undef + Value *RetVal = UndefValue::get(RetTy); + for (unsigned i = 0; i != RetCount; ++i) + if (NewRetIdxs[i] != -1) { + Value *V; + if (RetTypes.size() > 1) + // We are still returning a struct, so extract the value from our + // return value + V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret", + InsertPt); + else + // We are now returning a single element, so just insert that + V = New; + // Insert the value at the old position + RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt); + } + // Now, replace all uses of the old call instruction with the return + // struct we built + Call->replaceAllUsesWith(RetVal); + New->takeName(Call); + } + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. + i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++i) + if (ArgAlive[i]) { + // If this is a live argument, move the name and users over to the new + // version. + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); + ++I2; + } else { + // If this argument is dead, replace any uses of it with null constants + // (these are guaranteed to become unused later on). + if (!I->getType()->isX86_MMXTy()) + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + } + + // If we change the return value of the function we must rewrite any return + // instructions. Check this now. + if (F->getReturnType() != NF->getReturnType()) + for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + Value *RetVal; + + if (NFTy->getReturnType()->isVoidTy()) { + RetVal = nullptr; + } else { + assert(RetTy->isStructTy() || RetTy->isArrayTy()); + // The original return value was a struct or array, insert + // extractvalue/insertvalue chains to extract only the values we need + // to return and insert them into our new result. + // This does generate messy code, but we'll let it to instcombine to + // clean that up. + Value *OldRet = RI->getOperand(0); + // Start out building up our return value from undef + RetVal = UndefValue::get(NRetTy); + for (unsigned i = 0; i != RetCount; ++i) + if (NewRetIdxs[i] != -1) { + ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, + "oldret", RI); + if (RetTypes.size() > 1) { + // We're still returning a struct, so reinsert the value into + // our new return value at the new index + + RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i], + "newret", RI); + } else { + // We are now only returning a simple value, so just return the + // extracted value. + RetVal = EV; + } + } + } + // Replace the return instruction with one returning the new return + // value (possibly 0 if we became void). + ReturnInst::Create(F->getContext(), RetVal, RI); + BB->getInstList().erase(RI); + } + + // Patch the pointer to LLVM function in debug info descriptor. + NF->setSubprogram(F->getSubprogram()); + + // Now that the old function is dead, delete it. + F->eraseFromParent(); + + return true; +} + +bool DAE::runOnModule(Module &M) { + bool Changed = false; + + // First pass: Do a simple check to see if any functions can have their "..." + // removed. We can do this if they never call va_start. This loop cannot be + // fused with the next loop, because deleting a function invalidates + // information computed while surveying other functions. + DEBUG(dbgs() << "DAE - Deleting dead varargs\n"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function &F = *I++; + if (F.getFunctionType()->isVarArg()) + Changed |= DeleteDeadVarargs(F); + } + + // Second phase:loop through the module, determining which arguments are live. + // We assume all arguments are dead unless proven otherwise (allowing us to + // determine that dead arguments passed into recursive functions are dead). + // + DEBUG(dbgs() << "DAE - Determining liveness\n"); + for (auto &F : M) + SurveyFunction(F); + + // Now, remove all dead arguments and return values from each function in + // turn. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + // Increment now, because the function will probably get removed (ie. + // replaced by a new one). + Function *F = &*I++; + Changed |= RemoveDeadStuffFromFunction(F); + } + + // Finally, look for any unused parameters in functions with non-local + // linkage and replace the passed in parameters with undef. + for (auto &F : M) + Changed |= RemoveDeadArgumentsFromCallers(F); + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp new file mode 100644 index 0000000..af313a6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -0,0 +1,84 @@ +//===-- ElimAvailExtern.cpp - DCE unreachable internal functions +//----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transform is designed to eliminate available external global +// definitions from the program, turning them into declarations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "elim-avail-extern" + +STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumVariables, "Number of global variables removed"); + +namespace { +struct EliminateAvailableExternally : public ModulePass { + static char ID; // Pass identification, replacement for typeid + EliminateAvailableExternally() : ModulePass(ID) { + initializeEliminateAvailableExternallyPass( + *PassRegistry::getPassRegistry()); + } + + // run - Do the EliminateAvailableExternally pass on the specified module, + // optionally updating the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M) override; +}; +} + +char EliminateAvailableExternally::ID = 0; +INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern", + "Eliminate Available Externally Globals", false, false) + +ModulePass *llvm::createEliminateAvailableExternallyPass() { + return new EliminateAvailableExternally(); +} + +bool EliminateAvailableExternally::runOnModule(Module &M) { + bool Changed = false; + + // Drop initializers of available externally global variables. + for (GlobalVariable &GV : M.globals()) { + if (!GV.hasAvailableExternallyLinkage()) + continue; + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + GV.setInitializer(nullptr); + if (isSafeToDestroyConstant(Init)) + Init->destroyConstant(); + } + GV.removeDeadConstantUsers(); + GV.setLinkage(GlobalValue::ExternalLinkage); + NumVariables++; + Changed = true; + } + + // Drop the bodies of available externally functions. + for (Function &F : M) { + if (!F.hasAvailableExternallyLinkage()) + continue; + if (!F.isDeclaration()) + // This will set the linkage to external + F.deleteBody(); + F.removeDeadConstantUsers(); + NumFunctions++; + Changed = true; + } + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp new file mode 100644 index 0000000..1a3b925 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -0,0 +1,160 @@ +//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass extracts global values +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include <algorithm> +using namespace llvm; + +/// Make sure GV is visible from both modules. Delete is true if it is +/// being deleted from this module. +/// This also makes sure GV cannot be dropped so that references from +/// the split module remain valid. +static void makeVisible(GlobalValue &GV, bool Delete) { + bool Local = GV.hasLocalLinkage(); + if (Local || Delete) { + GV.setLinkage(GlobalValue::ExternalLinkage); + if (Local) + GV.setVisibility(GlobalValue::HiddenVisibility); + return; + } + + if (!GV.hasLinkOnceLinkage()) { + assert(!GV.isDiscardableIfUnused()); + return; + } + + // Map linkonce* to weak* so that llvm doesn't drop this GV. + switch(GV.getLinkage()) { + default: + llvm_unreachable("Unexpected linkage"); + case GlobalValue::LinkOnceAnyLinkage: + GV.setLinkage(GlobalValue::WeakAnyLinkage); + return; + case GlobalValue::LinkOnceODRLinkage: + GV.setLinkage(GlobalValue::WeakODRLinkage); + return; + } +} + +namespace { + /// @brief A pass to extract specific functions and their dependencies. + class GVExtractorPass : public ModulePass { + SetVector<GlobalValue *> Named; + bool deleteStuff; + public: + static char ID; // Pass identification, replacement for typeid + + /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the + /// specified function. Otherwise, it deletes as much of the module as + /// possible, except for the function specified. + /// + explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true) + : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} + + bool runOnModule(Module &M) override { + // Visit the global inline asm. + if (!deleteStuff) + M.setModuleInlineAsm(""); + + // For simplicity, just give all GlobalValues ExternalLinkage. A trickier + // implementation could figure out which GlobalValues are actually + // referenced by the Named set, and which GlobalValues in the rest of + // the module are referenced by the NamedSet, and get away with leaving + // more internal and private things internal and private. But for now, + // be conservative and simple. + + // Visit the GlobalVariables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + bool Delete = + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); + if (!Delete) { + if (I->hasAvailableExternallyLinkage()) + continue; + if (I->getName() == "llvm.global_ctors") + continue; + } + + makeVisible(*I, Delete); + + if (Delete) { + // Make this a declaration and drop it's comdat. + I->setInitializer(nullptr); + I->setComdat(nullptr); + } + } + + // Visit the Functions. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + bool Delete = + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); + if (!Delete) { + if (I->hasAvailableExternallyLinkage()) + continue; + } + + makeVisible(*I, Delete); + + if (Delete) { + // Make this a declaration and drop it's comdat. + I->deleteBody(); + I->setComdat(nullptr); + } + } + + // Visit the Aliases. + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E;) { + Module::alias_iterator CurI = I; + ++I; + + bool Delete = deleteStuff == (bool)Named.count(&*CurI); + makeVisible(*CurI, Delete); + + if (Delete) { + Type *Ty = CurI->getType()->getElementType(); + + CurI->removeFromParent(); + llvm::Value *Declaration; + if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) { + Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, + CurI->getName(), &M); + + } else { + Declaration = + new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, + nullptr, CurI->getName()); + + } + CurI->replaceAllUsesWith(Declaration); + delete &*CurI; + } + } + + return true; + } + }; + + char GVExtractorPass::ID = 0; +} + +ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs, + bool deleteFn) { + return new GVExtractorPass(GVs, deleteFn); +} diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp new file mode 100644 index 0000000..816291d --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -0,0 +1,121 @@ +//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "forceattrs" + +static cl::list<std::string> + ForceAttributes("force-attribute", cl::Hidden, + cl::desc("Add an attribute to a function. This should be a " + "pair of 'function-name:attribute-name', for " + "example -force-add-attribute=foo:noinline. This " + "option can be specified multiple times.")); + +static Attribute::AttrKind parseAttrKind(StringRef Kind) { + return StringSwitch<Attribute::AttrKind>(Kind) + .Case("alwaysinline", Attribute::AlwaysInline) + .Case("builtin", Attribute::Builtin) + .Case("cold", Attribute::Cold) + .Case("convergent", Attribute::Convergent) + .Case("inlinehint", Attribute::InlineHint) + .Case("jumptable", Attribute::JumpTable) + .Case("minsize", Attribute::MinSize) + .Case("naked", Attribute::Naked) + .Case("nobuiltin", Attribute::NoBuiltin) + .Case("noduplicate", Attribute::NoDuplicate) + .Case("noimplicitfloat", Attribute::NoImplicitFloat) + .Case("noinline", Attribute::NoInline) + .Case("nonlazybind", Attribute::NonLazyBind) + .Case("noredzone", Attribute::NoRedZone) + .Case("noreturn", Attribute::NoReturn) + .Case("norecurse", Attribute::NoRecurse) + .Case("nounwind", Attribute::NoUnwind) + .Case("optnone", Attribute::OptimizeNone) + .Case("optsize", Attribute::OptimizeForSize) + .Case("readnone", Attribute::ReadNone) + .Case("readonly", Attribute::ReadOnly) + .Case("argmemonly", Attribute::ArgMemOnly) + .Case("returns_twice", Attribute::ReturnsTwice) + .Case("safestack", Attribute::SafeStack) + .Case("sanitize_address", Attribute::SanitizeAddress) + .Case("sanitize_memory", Attribute::SanitizeMemory) + .Case("sanitize_thread", Attribute::SanitizeThread) + .Case("ssp", Attribute::StackProtect) + .Case("sspreq", Attribute::StackProtectReq) + .Case("sspstrong", Attribute::StackProtectStrong) + .Case("uwtable", Attribute::UWTable) + .Default(Attribute::None); +} + +/// If F has any forced attributes given on the command line, add them. +static void addForcedAttributes(Function &F) { + for (auto &S : ForceAttributes) { + auto KV = StringRef(S).split(':'); + if (KV.first != F.getName()) + continue; + + auto Kind = parseAttrKind(KV.second); + if (Kind == Attribute::None) { + DEBUG(dbgs() << "ForcedAttribute: " << KV.second + << " unknown or not handled!\n"); + continue; + } + if (F.hasFnAttribute(Kind)) + continue; + F.addFnAttr(Kind); + } +} + +PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) { + if (ForceAttributes.empty()) + return PreservedAnalyses::all(); + + for (Function &F : M.functions()) + addForcedAttributes(F); + + // Just conservatively invalidate analyses, this isn't likely to be important. + return PreservedAnalyses::none(); +} + +namespace { +struct ForceFunctionAttrsLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ForceFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeForceFunctionAttrsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (ForceAttributes.empty()) + return false; + + for (Function &F : M.functions()) + addForcedAttributes(F); + + // Conservatively assume we changed something. + return true; + } +}; +} + +char ForceFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs", + "Force set function attributes", false, false) + +Pass *llvm::createForceFunctionAttrsLegacyPass() { + return new ForceFunctionAttrsLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp new file mode 100644 index 0000000..6dcfb3f --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -0,0 +1,1058 @@ +//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, looking for functions which do not access or only read +// non-local memory, and marking them readnone/readonly. It does the +// same with function arguments independently, marking them readonly/ +// readnone/nocapture. Finally, well-known library call declarations +// are marked with all attributes that are consistent with the +// function's standard definition. This pass is implemented as a +// bottom-up traversal of the call-graph. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "functionattrs" + +STATISTIC(NumReadNone, "Number of functions marked readnone"); +STATISTIC(NumReadOnly, "Number of functions marked readonly"); +STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); +STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); +STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); +STATISTIC(NumNoAlias, "Number of function returns marked noalias"); +STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); +STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); + +namespace { +typedef SmallSetVector<Function *, 8> SCCNodeSet; +} + +namespace { +struct FunctionAttrs : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + FunctionAttrs() : CallGraphSCCPass(ID) { + initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnSCC(CallGraphSCC &SCC) override; + bool doInitialization(CallGraph &CG) override { + Revisit.clear(); + return false; + } + bool doFinalization(CallGraph &CG) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + +private: + TargetLibraryInfo *TLI; + SmallVector<WeakVH,16> Revisit; +}; +} + +char FunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) + +Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } + +namespace { +/// The three kinds of memory access relevant to 'readonly' and +/// 'readnone' attributes. +enum MemoryAccessKind { + MAK_ReadNone = 0, + MAK_ReadOnly = 1, + MAK_MayWrite = 2 +}; +} + +static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, + const SCCNodeSet &SCCNodes) { + FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); + if (MRB == FMRB_DoesNotAccessMemory) + // Already perfect! + return MAK_ReadNone; + + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F.isDeclaration() || F.mayBeOverridden()) { + if (AliasAnalysis::onlyReadsMemory(MRB)) + return MAK_ReadOnly; + + // Conservatively assume it writes to memory. + return MAK_MayWrite; + } + + // Scan the function body for instructions that may read or write memory. + bool ReadsMemory = false; + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + + // Some instructions can be ignored even if they read or write memory. + // Detect these now, skipping to the next instruction if one is found. + CallSite CS(cast<Value>(I)); + if (CS) { + // Ignore calls to functions in the same SCC. + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + continue; + FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); + + // If the call doesn't access memory, we're done. + if (!(MRB & MRI_ModRef)) + continue; + + if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { + // The call could access any memory. If that includes writes, give up. + if (MRB & MRI_Mod) + return MAK_MayWrite; + // If it reads, note it. + if (MRB & MRI_Ref) + ReadsMemory = true; + continue; + } + + // Check whether all pointer arguments point to local memory, and + // ignore calls that only access local memory. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) { + Value *Arg = *CI; + if (!Arg->getType()->isPtrOrPtrVectorTy()) + continue; + + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); + + // Skip accesses to local or constant memory as they don't impact the + // externally visible mod/ref behavior. + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + + if (MRB & MRI_Mod) + // Writes non-local memory. Give up. + return MAK_MayWrite; + if (MRB & MRI_Ref) + // Ok, it reads non-local memory. + ReadsMemory = true; + } + continue; + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads from local memory. (Atomic is okay here.) + if (!LI->isVolatile()) { + MemoryLocation Loc = MemoryLocation::get(LI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Ignore non-volatile stores to local memory. (Atomic is okay here.) + if (!SI->isVolatile()) { + MemoryLocation Loc = MemoryLocation::get(SI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { + // Ignore vaargs on local memory. + MemoryLocation Loc = MemoryLocation::get(VI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + + // Any remaining instructions need to be taken seriously! Check if they + // read or write memory. + if (I->mayWriteToMemory()) + // Writes memory. Just give up. + return MAK_MayWrite; + + // If this instruction may read memory, remember that. + ReadsMemory |= I->mayReadFromMemory(); + } + + return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; +} + +/// Deduce readonly/readnone attributes for the SCC. +template <typename AARGetterT> +static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) { + // Check if any of the functions in the SCC read or write memory. If they + // write memory then they can't be marked readnone or readonly. + bool ReadsMemory = false; + for (Function *F : SCCNodes) { + // Call the callable parameter to look up AA results for this function. + AAResults &AAR = AARGetter(*F); + + switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) { + case MAK_MayWrite: + return false; + case MAK_ReadOnly: + ReadsMemory = true; + break; + case MAK_ReadNone: + // Nothing to do! + break; + } + } + + // Success! Functions in this SCC do not access memory, or only read memory. + // Give them the appropriate attribute. + bool MadeChange = false; + for (Function *F : SCCNodes) { + if (F->doesNotAccessMemory()) + // Already perfect! + continue; + + if (F->onlyReadsMemory() && ReadsMemory) + // No change. + continue; + + MadeChange = true; + + // Clear out any existing attributes. + AttrBuilder B; + B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); + F->removeAttributes( + AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B)); + + // Add in the new attribute. + F->addAttribute(AttributeSet::FunctionIndex, + ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); + + if (ReadsMemory) + ++NumReadOnly; + else + ++NumReadNone; + } + + return MadeChange; +} + +namespace { +/// For a given pointer Argument, this retains a list of Arguments of functions +/// in the same SCC that the pointer data flows into. We use this to build an +/// SCC of the arguments. +struct ArgumentGraphNode { + Argument *Definition; + SmallVector<ArgumentGraphNode *, 4> Uses; +}; + +class ArgumentGraph { + // We store pointers to ArgumentGraphNode objects, so it's important that + // that they not move around upon insert. + typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy; + + ArgumentMapTy ArgumentMap; + + // There is no root node for the argument graph, in fact: + // void f(int *x, int *y) { if (...) f(x, y); } + // is an example where the graph is disconnected. The SCCIterator requires a + // single entry point, so we maintain a fake ("synthetic") root node that + // uses every node. Because the graph is directed and nothing points into + // the root, it will not participate in any SCCs (except for its own). + ArgumentGraphNode SyntheticRoot; + +public: + ArgumentGraph() { SyntheticRoot.Definition = nullptr; } + + typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator; + + iterator begin() { return SyntheticRoot.Uses.begin(); } + iterator end() { return SyntheticRoot.Uses.end(); } + ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } + + ArgumentGraphNode *operator[](Argument *A) { + ArgumentGraphNode &Node = ArgumentMap[A]; + Node.Definition = A; + SyntheticRoot.Uses.push_back(&Node); + return &Node; + } +}; + +/// This tracker checks whether callees are in the SCC, and if so it does not +/// consider that a capture, instead adding it to the "Uses" list and +/// continuing with the analysis. +struct ArgumentUsesTracker : public CaptureTracker { + ArgumentUsesTracker(const SCCNodeSet &SCCNodes) + : Captured(false), SCCNodes(SCCNodes) {} + + void tooManyUses() override { Captured = true; } + + bool captured(const Use *U) override { + CallSite CS(U->getUser()); + if (!CS.getInstruction()) { + Captured = true; + return true; + } + + Function *F = CS.getCalledFunction(); + if (!F || F->isDeclaration() || F->mayBeOverridden() || + !SCCNodes.count(F)) { + Captured = true; + return true; + } + + // Note: the callee and the two successor blocks *follow* the argument + // operands. This means there is no need to adjust UseIndex to account for + // these. + + unsigned UseIndex = + std::distance(const_cast<const Use *>(CS.arg_begin()), U); + + assert(UseIndex < CS.data_operands_size() && + "Indirect function calls should have been filtered above!"); + + if (UseIndex >= CS.getNumArgOperands()) { + // Data operand, but not a argument operand -- must be a bundle operand + assert(CS.hasOperandBundles() && "Must be!"); + + // CaptureTracking told us that we're being captured by an operand bundle + // use. In this case it does not matter if the callee is within our SCC + // or not -- we've been captured in some unknown way, and we have to be + // conservative. + Captured = true; + return true; + } + + if (UseIndex >= F->arg_size()) { + assert(F->isVarArg() && "More params than args in non-varargs call"); + Captured = true; + return true; + } + + Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); + return false; + } + + bool Captured; // True only if certainly captured (used outside our SCC). + SmallVector<Argument *, 4> Uses; // Uses within our SCC. + + const SCCNodeSet &SCCNodes; +}; +} + +namespace llvm { +template <> struct GraphTraits<ArgumentGraphNode *> { + typedef ArgumentGraphNode NodeType; + typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType; + + static inline NodeType *getEntryNode(NodeType *A) { return A; } + static inline ChildIteratorType child_begin(NodeType *N) { + return N->Uses.begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { + return N->Uses.end(); + } +}; +template <> +struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { + static NodeType *getEntryNode(ArgumentGraph *AG) { + return AG->getEntryNode(); + } + static ChildIteratorType nodes_begin(ArgumentGraph *AG) { + return AG->begin(); + } + static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } +}; +} + +/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. +static Attribute::AttrKind +determinePointerReadAttrs(Argument *A, + const SmallPtrSet<Argument *, 8> &SCCNodes) { + + SmallVector<Use *, 32> Worklist; + SmallSet<Use *, 32> Visited; + + // inalloca arguments are always clobbered by the call. + if (A->hasInAllocaAttr()) + return Attribute::None; + + bool IsRead = false; + // We don't need to track IsWritten. If A is written to, return immediately. + + for (Use &U : A->uses()) { + Visited.insert(&U); + Worklist.push_back(&U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + // The original value is not read/written via this if the new value isn't. + for (Use &UU : I->uses()) + if (Visited.insert(&UU).second) + Worklist.push_back(&UU); + break; + + case Instruction::Call: + case Instruction::Invoke: { + bool Captures = true; + + if (I->getType()->isVoidTy()) + Captures = false; + + auto AddUsersToWorklistIfCapturing = [&] { + if (Captures) + for (Use &UU : I->uses()) + if (Visited.insert(&UU).second) + Worklist.push_back(&UU); + }; + + CallSite CS(I); + if (CS.doesNotAccessMemory()) { + AddUsersToWorklistIfCapturing(); + continue; + } + + Function *F = CS.getCalledFunction(); + if (!F) { + if (CS.onlyReadsMemory()) { + IsRead = true; + AddUsersToWorklistIfCapturing(); + continue; + } + return Attribute::None; + } + + // Note: the callee and the two successor blocks *follow* the argument + // operands. This means there is no need to adjust UseIndex to account + // for these. + + unsigned UseIndex = std::distance(CS.arg_begin(), U); + + // U cannot be the callee operand use: since we're exploring the + // transitive uses of an Argument, having such a use be a callee would + // imply the CallSite is an indirect call or invoke; and we'd take the + // early exit above. + assert(UseIndex < CS.data_operands_size() && + "Data operand use expected!"); + + bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands(); + + if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { + assert(F->isVarArg() && "More params than args in non-varargs call"); + return Attribute::None; + } + + Captures &= !CS.doesNotCapture(UseIndex); + + // Since the optimizer (by design) cannot see the data flow corresponding + // to a operand bundle use, these cannot participate in the optimistic SCC + // analysis. Instead, we model the operand bundle uses as arguments in + // call to a function external to the SCC. + if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) || + IsOperandBundleUse) { + + // The accessors used on CallSite here do the right thing for calls and + // invokes with operand bundles. + + if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex)) + return Attribute::None; + if (!CS.doesNotAccessMemory(UseIndex)) + IsRead = true; + } + + AddUsersToWorklistIfCapturing(); + break; + } + + case Instruction::Load: + IsRead = true; + break; + + case Instruction::ICmp: + case Instruction::Ret: + break; + + default: + return Attribute::None; + } + } + + return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; +} + +/// Deduce nocapture attributes for the SCC. +static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { + bool Changed = false; + + ArgumentGraph AG; + + AttrBuilder B; + B.addAttribute(Attribute::NoCapture); + + // Check each function in turn, determining which pointer arguments are not + // captured. + for (Function *F : SCCNodes) { + // Definitions with weak linkage may be overridden at linktime with + // something that captures pointers, so treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + continue; + + // Functions that are readonly (or readnone) and nounwind and don't return + // a value can't capture arguments. Don't analyze them. + if (F->onlyReadsMemory() && F->doesNotThrow() && + F->getReturnType()->isVoidTy()) { + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; + ++A) { + if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { + A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); + ++NumNoCapture; + Changed = true; + } + } + continue; + } + + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; + ++A) { + if (!A->getType()->isPointerTy()) + continue; + bool HasNonLocalUses = false; + if (!A->hasNoCaptureAttr()) { + ArgumentUsesTracker Tracker(SCCNodes); + PointerMayBeCaptured(&*A, &Tracker); + if (!Tracker.Captured) { + if (Tracker.Uses.empty()) { + // If it's trivially not captured, mark it nocapture now. + A->addAttr( + AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); + ++NumNoCapture; + Changed = true; + } else { + // If it's not trivially captured and not trivially not captured, + // then it must be calling into another function in our SCC. Save + // its particulars for Argument-SCC analysis later. + ArgumentGraphNode *Node = AG[&*A]; + for (SmallVectorImpl<Argument *>::iterator + UI = Tracker.Uses.begin(), + UE = Tracker.Uses.end(); + UI != UE; ++UI) { + Node->Uses.push_back(AG[*UI]); + if (*UI != A) + HasNonLocalUses = true; + } + } + } + // Otherwise, it's captured. Don't bother doing SCC analysis on it. + } + if (!HasNonLocalUses && !A->onlyReadsMemory()) { + // Can we determine that it's readonly/readnone without doing an SCC? + // Note that we don't allow any calls at all here, or else our result + // will be dependent on the iteration order through the functions in the + // SCC. + SmallPtrSet<Argument *, 8> Self; + Self.insert(&*A); + Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); + if (R != Attribute::None) { + AttrBuilder B; + B.addAttribute(R); + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); + Changed = true; + R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; + } + } + } + } + + // The graph we've collected is partial because we stopped scanning for + // argument uses once we solved the argument trivially. These partial nodes + // show up as ArgumentGraphNode objects with an empty Uses list, and for + // these nodes the final decision about whether they capture has already been + // made. If the definition doesn't have a 'nocapture' attribute by now, it + // captures. + + for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { + const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; + if (ArgumentSCC.size() == 1) { + if (!ArgumentSCC[0]->Definition) + continue; // synthetic root node + + // eg. "void f(int* x) { if (...) f(x); }" + if (ArgumentSCC[0]->Uses.size() == 1 && + ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { + Argument *A = ArgumentSCC[0]->Definition; + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); + ++NumNoCapture; + Changed = true; + } + continue; + } + + bool SCCCaptured = false; + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { + ArgumentGraphNode *Node = *I; + if (Node->Uses.empty()) { + if (!Node->Definition->hasNoCaptureAttr()) + SCCCaptured = true; + } + } + if (SCCCaptured) + continue; + + SmallPtrSet<Argument *, 8> ArgumentSCCNodes; + // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for + // quickly looking up whether a given Argument is in this ArgumentSCC. + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { + ArgumentSCCNodes.insert((*I)->Definition); + } + + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { + ArgumentGraphNode *N = *I; + for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(), + UE = N->Uses.end(); + UI != UE; ++UI) { + Argument *A = (*UI)->Definition; + if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) + continue; + SCCCaptured = true; + break; + } + } + if (SCCCaptured) + continue; + + for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { + Argument *A = ArgumentSCC[i]->Definition; + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); + ++NumNoCapture; + Changed = true; + } + + // We also want to compute readonly/readnone. With a small number of false + // negatives, we can assume that any pointer which is captured isn't going + // to be provably readonly or readnone, since by definition we can't + // analyze all uses of a captured pointer. + // + // The false negatives happen when the pointer is captured by a function + // that promises readonly/readnone behaviour on the pointer, then the + // pointer's lifetime ends before anything that writes to arbitrary memory. + // Also, a readonly/readnone pointer may be returned, but returning a + // pointer is capturing it. + + Attribute::AttrKind ReadAttr = Attribute::ReadNone; + for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { + Argument *A = ArgumentSCC[i]->Definition; + Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); + if (K == Attribute::ReadNone) + continue; + if (K == Attribute::ReadOnly) { + ReadAttr = Attribute::ReadOnly; + continue; + } + ReadAttr = K; + break; + } + + if (ReadAttr != Attribute::None) { + AttrBuilder B, R; + B.addAttribute(ReadAttr); + R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); + for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { + Argument *A = ArgumentSCC[i]->Definition; + // Clear out existing readonly/readnone attributes + A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R)); + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); + ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; + Changed = true; + } + } + } + + return Changed; +} + +/// Tests whether a function is "malloc-like". +/// +/// A function is "malloc-like" if it returns either null or a pointer that +/// doesn't alias any other pointer visible to the caller. +static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { + SmallSetVector<Value *, 8> FlowsToReturn; + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) + if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) + FlowsToReturn.insert(Ret->getReturnValue()); + + for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { + Value *RetVal = FlowsToReturn[i]; + + if (Constant *C = dyn_cast<Constant>(RetVal)) { + if (!C->isNullValue() && !isa<UndefValue>(C)) + return false; + + continue; + } + + if (isa<Argument>(RetVal)) + return false; + + if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) + switch (RVI->getOpcode()) { + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(RVI); + for (Value *IncValue : PN->incoming_values()) + FlowsToReturn.insert(IncValue); + continue; + } + + // Check whether the pointer came from an allocation. + case Instruction::Alloca: + break; + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + if (CS.paramHasAttr(0, Attribute::NoAlias)) + break; + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + break; + } // fall-through + default: + return false; // Did not come from an allocation. + } + + if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) + return false; + } + + return true; +} + +/// Deduce noalias attributes for the SCC. +static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { + // Check each function in turn, determining which functions return noalias + // pointers. + for (Function *F : SCCNodes) { + // Already noalias. + if (F->doesNotAlias(0)) + continue; + + // Definitions with weak linkage may be overridden at linktime, so + // treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + return false; + + // We annotate noalias return values, which are only applicable to + // pointer types. + if (!F->getReturnType()->isPointerTy()) + continue; + + if (!isFunctionMallocLike(F, SCCNodes)) + return false; + } + + bool MadeChange = false; + for (Function *F : SCCNodes) { + if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) + continue; + + F->setDoesNotAlias(0); + ++NumNoAlias; + MadeChange = true; + } + + return MadeChange; +} + +/// Tests whether this function is known to not return null. +/// +/// Requires that the function returns a pointer. +/// +/// Returns true if it believes the function will not return a null, and sets +/// \p Speculative based on whether the returned conclusion is a speculative +/// conclusion due to SCC calls. +static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI, bool &Speculative) { + assert(F->getReturnType()->isPointerTy() && + "nonnull only meaningful on pointer types"); + Speculative = false; + + SmallSetVector<Value *, 8> FlowsToReturn; + for (BasicBlock &BB : *F) + if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) + FlowsToReturn.insert(Ret->getReturnValue()); + + for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { + Value *RetVal = FlowsToReturn[i]; + + // If this value is locally known to be non-null, we're good + if (isKnownNonNull(RetVal, &TLI)) + continue; + + // Otherwise, we need to look upwards since we can't make any local + // conclusions. + Instruction *RVI = dyn_cast<Instruction>(RetVal); + if (!RVI) + return false; + switch (RVI->getOpcode()) { + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(RVI); + for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + FlowsToReturn.insert(PN->getIncomingValue(i)); + continue; + } + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + Function *Callee = CS.getCalledFunction(); + // A call to a node within the SCC is assumed to return null until + // proven otherwise + if (Callee && SCCNodes.count(Callee)) { + Speculative = true; + continue; + } + return false; + } + default: + return false; // Unknown source, may be null + }; + llvm_unreachable("should have either continued or returned"); + } + + return true; +} + +/// Deduce nonnull attributes for the SCC. +static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI) { + // Speculative that all functions in the SCC return only nonnull + // pointers. We may refute this as we analyze functions. + bool SCCReturnsNonNull = true; + + bool MadeChange = false; + + // Check each function in turn, determining which functions return nonnull + // pointers. + for (Function *F : SCCNodes) { + // Already nonnull. + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::NonNull)) + continue; + + // Definitions with weak linkage may be overridden at linktime, so + // treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + return false; + + // We annotate nonnull return values, which are only applicable to + // pointer types. + if (!F->getReturnType()->isPointerTy()) + continue; + + bool Speculative = false; + if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) { + if (!Speculative) { + // Mark the function eagerly since we may discover a function + // which prevents us from speculating about the entire SCC + DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n"); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + ++NumNonNullReturn; + MadeChange = true; + } + continue; + } + // At least one function returns something which could be null, can't + // speculate any more. + SCCReturnsNonNull = false; + } + + if (SCCReturnsNonNull) { + for (Function *F : SCCNodes) { + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::NonNull) || + !F->getReturnType()->isPointerTy()) + continue; + + DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + ++NumNonNullReturn; + MadeChange = true; + } + } + + return MadeChange; +} + +static bool setDoesNotRecurse(Function &F) { + if (F.doesNotRecurse()) + return false; + F.setDoesNotRecurse(); + ++NumNoRecurse; + return true; +} + +static bool addNoRecurseAttrs(const CallGraphSCC &SCC, + SmallVectorImpl<WeakVH> &Revisit) { + // Try and identify functions that do not recurse. + + // If the SCC contains multiple nodes we know for sure there is recursion. + if (!SCC.isSingular()) + return false; + + const CallGraphNode *CGN = *SCC.begin(); + Function *F = CGN->getFunction(); + if (!F || F->isDeclaration() || F->doesNotRecurse()) + return false; + + // If all of the calls in F are identifiable and are to norecurse functions, F + // is norecurse. This check also detects self-recursion as F is not currently + // marked norecurse, so any called from F to F will not be marked norecurse. + if (std::all_of(CGN->begin(), CGN->end(), + [](const CallGraphNode::CallRecord &CR) { + Function *F = CR.second->getFunction(); + return F && F->doesNotRecurse(); + })) + // Function calls a potentially recursive function. + return setDoesNotRecurse(*F); + + // We know that F is not obviously recursive, but we haven't been able to + // prove that it doesn't actually recurse. Add it to the Revisit list to try + // again top-down later. + Revisit.push_back(F); + return false; +} + +static bool addNoRecurseAttrsTopDownOnly(Function *F) { + // If F is internal and all uses are in norecurse functions, then F is also + // norecurse. + if (F->doesNotRecurse()) + return false; + if (F->hasInternalLinkage()) { + for (auto *U : F->users()) + if (auto *I = dyn_cast<Instruction>(U)) { + if (!I->getParent()->getParent()->doesNotRecurse()) + return false; + } else { + return false; + } + return setDoesNotRecurse(*F); + } + return false; +} + +bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + bool Changed = false; + + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + + // Fill SCCNodes with the elements of the SCC. Used for quickly looking up + // whether a given CallGraphNode is in this SCC. Also track whether there are + // any external or opt-none nodes that will prevent us from optimizing any + // part of the SCC. + SCCNodeSet SCCNodes; + bool ExternalNode = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) { + // External node or function we're trying not to optimize - we both avoid + // transform them and avoid leveraging information they provide. + ExternalNode = true; + continue; + } + + SCCNodes.insert(F); + } + + Changed |= addReadAttrs(SCCNodes, AARGetter); + Changed |= addArgumentAttrs(SCCNodes); + + // If we have no external nodes participating in the SCC, we can deduce some + // more precise attributes as well. + if (!ExternalNode) { + Changed |= addNoAliasAttrs(SCCNodes); + Changed |= addNonNullAttrs(SCCNodes, *TLI); + } + + Changed |= addNoRecurseAttrs(SCC, Revisit); + return Changed; +} + +bool FunctionAttrs::doFinalization(CallGraph &CG) { + bool Changed = false; + // When iterating over SCCs we visit functions in a bottom-up fashion. Some of + // the rules we have for identifying norecurse functions work best with a + // top-down walk, so look again at all the functions we previously marked as + // worth revisiting, in top-down order. + for (auto &F : reverse(Revisit)) + if (F) + Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F)); + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp new file mode 100644 index 0000000..d8b677b --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -0,0 +1,433 @@ +//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Function import based on summaries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/FunctionImport.h" + +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/AutoUpgrade.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/SourceMgr.h" + +#include <map> + +using namespace llvm; + +#define DEBUG_TYPE "function-import" + +/// Limit on instruction count of imported functions. +static cl::opt<unsigned> ImportInstrLimit( + "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"), + cl::desc("Only import functions with less than N instructions")); + +// Load lazily a module from \p FileName in \p Context. +static std::unique_ptr<Module> loadFile(const std::string &FileName, + LLVMContext &Context) { + SMDiagnostic Err; + DEBUG(dbgs() << "Loading '" << FileName << "'\n"); + std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context); + if (!Result) { + Err.print("function-import", errs()); + return nullptr; + } + + Result->materializeMetadata(); + UpgradeDebugInfo(*Result); + + return Result; +} + +namespace { +/// Helper to load on demand a Module from file and cache it for subsequent +/// queries. It can be used with the FunctionImporter. +class ModuleLazyLoaderCache { + /// Cache of lazily loaded module for import. + StringMap<std::unique_ptr<Module>> ModuleMap; + + /// Retrieve a Module from the cache or lazily load it on demand. + std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule; + +public: + /// Create the loader, Module will be initialized in \p Context. + ModuleLazyLoaderCache(std::function< + std::unique_ptr<Module>(StringRef FileName)> createLazyModule) + : createLazyModule(createLazyModule) {} + + /// Retrieve a Module from the cache or lazily load it on demand. + Module &operator()(StringRef FileName); + + std::unique_ptr<Module> takeModule(StringRef FileName) { + auto I = ModuleMap.find(FileName); + assert(I != ModuleMap.end()); + std::unique_ptr<Module> Ret = std::move(I->second); + ModuleMap.erase(I); + return Ret; + } +}; + +// Get a Module for \p FileName from the cache, or load it lazily. +Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) { + auto &Module = ModuleMap[Identifier]; + if (!Module) + Module = createLazyModule(Identifier); + return *Module; +} +} // anonymous namespace + +/// Walk through the instructions in \p F looking for external +/// calls not already in the \p CalledFunctions set. If any are +/// found they are added to the \p Worklist for importing. +static void findExternalCalls(const Module &DestModule, Function &F, + const FunctionInfoIndex &Index, + StringSet<> &CalledFunctions, + SmallVector<StringRef, 64> &Worklist) { + // We need to suffix internal function calls imported from other modules, + // prepare the suffix ahead of time. + std::string Suffix; + if (F.getParent() != &DestModule) + Suffix = + (Twine(".llvm.") + + Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str(); + + for (auto &BB : F) { + for (auto &I : BB) { + if (isa<CallInst>(I)) { + auto CalledFunction = cast<CallInst>(I).getCalledFunction(); + // Insert any new external calls that have not already been + // added to set/worklist. + if (!CalledFunction || !CalledFunction->hasName()) + continue; + // Ignore intrinsics early + if (CalledFunction->isIntrinsic()) { + assert(CalledFunction->getIntrinsicID() != 0); + continue; + } + auto ImportedName = CalledFunction->getName(); + auto Renamed = (ImportedName + Suffix).str(); + // Rename internal functions + if (CalledFunction->hasInternalLinkage()) { + ImportedName = Renamed; + } + auto It = CalledFunctions.insert(ImportedName); + if (!It.second) { + // This is a call to a function we already considered, skip. + continue; + } + // Ignore functions already present in the destination module + auto *SrcGV = DestModule.getNamedValue(ImportedName); + if (SrcGV) { + assert(isa<Function>(SrcGV) && "Name collision during import"); + if (!cast<Function>(SrcGV)->isDeclaration()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " + << ImportedName << " already in DestinationModule\n"); + continue; + } + } + + Worklist.push_back(It.first->getKey()); + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Adding callee for : " << ImportedName << " : " + << F.getName() << "\n"); + } + } + } +} + +// Helper function: given a worklist and an index, will process all the worklist +// and decide what to import based on the summary information. +// +// Nothing is actually imported, functions are materialized in their source +// module and analyzed there. +// +// \p ModuleToFunctionsToImportMap is filled with the set of Function to import +// per Module. +static void GetImportList(Module &DestModule, + SmallVector<StringRef, 64> &Worklist, + StringSet<> &CalledFunctions, + std::map<StringRef, DenseSet<const GlobalValue *>> + &ModuleToFunctionsToImportMap, + const FunctionInfoIndex &Index, + ModuleLazyLoaderCache &ModuleLoaderCache) { + while (!Worklist.empty()) { + auto CalledFunctionName = Worklist.pop_back_val(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for " + << CalledFunctionName << "\n"); + + // Try to get a summary for this function call. + auto InfoList = Index.findFunctionInfoList(CalledFunctionName); + if (InfoList == Index.end()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for " + << CalledFunctionName << " Ignoring.\n"); + continue; + } + assert(!InfoList->second.empty() && "No summary, error at import?"); + + // Comdat can have multiple entries, FIXME: what do we do with them? + auto &Info = InfoList->second[0]; + assert(Info && "Nullptr in list, error importing summaries?\n"); + + auto *Summary = Info->functionSummary(); + if (!Summary) { + // FIXME: in case we are lazyloading summaries, we can do it now. + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Missing summary for " << CalledFunctionName + << ", error at import?\n"); + llvm_unreachable("Missing summary"); + } + + if (Summary->instCount() > ImportInstrLimit) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of " + << CalledFunctionName << " with " << Summary->instCount() + << " instructions (limit " << ImportInstrLimit << ")\n"); + continue; + } + + // Get the module path from the summary. + auto ModuleIdentifier = Summary->modulePath(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing " + << CalledFunctionName << " from " << ModuleIdentifier << "\n"); + + auto &SrcModule = ModuleLoaderCache(ModuleIdentifier); + + // The function that we will import! + GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName); + + if (!SGV) { + // The destination module is referencing function using their renamed name + // when importing a function that was originally local in the source + // module. The source module we have might not have been renamed so we try + // to remove the suffix added during the renaming to recover the original + // name in the source module. + std::pair<StringRef, StringRef> Split = + CalledFunctionName.split(".llvm."); + SGV = SrcModule.getNamedValue(Split.first); + assert(SGV && "Can't find function to import in source module"); + } + if (!SGV) { + report_fatal_error(Twine("Can't load function '") + CalledFunctionName + + "' in Module '" + SrcModule.getModuleIdentifier() + + "', error in the summary?\n"); + } + + Function *F = dyn_cast<Function>(SGV); + if (!F && isa<GlobalAlias>(SGV)) { + auto *SGA = dyn_cast<GlobalAlias>(SGV); + F = dyn_cast<Function>(SGA->getBaseObject()); + CalledFunctionName = F->getName(); + } + assert(F && "Imported Function is ... not a Function"); + + // We cannot import weak_any functions/aliases without possibly affecting + // the order they are seen and selected by the linker, changing program + // semantics. + if (SGV->hasWeakAnyLinkage()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Ignoring import request for weak-any " + << (isa<Function>(SGV) ? "function " : "alias ") + << CalledFunctionName << " from " + << SrcModule.getModuleIdentifier() << "\n"); + continue; + } + + // Add the function to the import list + auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()]; + Entry.insert(F); + + // Process the newly imported functions and add callees to the worklist. + F->materialize(); + findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist); + } +} + +// Automatically import functions in Module \p DestModule based on the summaries +// index. +// +// The current implementation imports every called functions that exists in the +// summaries index. +bool FunctionImporter::importFunctions(Module &DestModule) { + DEBUG(dbgs() << "Starting import for Module " + << DestModule.getModuleIdentifier() << "\n"); + unsigned ImportedCount = 0; + + /// First step is collecting the called external functions. + StringSet<> CalledFunctions; + SmallVector<StringRef, 64> Worklist; + for (auto &F : DestModule) { + if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist); + } + if (Worklist.empty()) + return false; + + /// Second step: for every call to an external function, try to import it. + + // Linker that will be used for importing function + Linker TheLinker(DestModule); + + // Map of Module -> List of Function to import from the Module + std::map<StringRef, DenseSet<const GlobalValue *>> + ModuleToFunctionsToImportMap; + + // Analyze the summaries and get the list of functions to import by + // populating ModuleToFunctionsToImportMap + ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader); + GetImportList(DestModule, Worklist, CalledFunctions, + ModuleToFunctionsToImportMap, Index, ModuleLoaderCache); + assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList"); + + StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>> + ModuleToTempMDValsMap; + + // Do the actual import of functions now, one Module at a time + for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) { + // Get the module for the import + auto &FunctionsToImport = FunctionsToImportPerModule.second; + std::unique_ptr<Module> SrcModule = + ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first); + assert(&DestModule.getContext() == &SrcModule->getContext() && + "Context mismatch"); + + // Save the mapping of value ids to temporary metadata created when + // importing this function. If we have already imported from this module, + // add new temporary metadata to the existing mapping. + auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()]; + if (!TempMDVals) + TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>(); + + // Link in the specified functions. + if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None, + &Index, &FunctionsToImport, TempMDVals.get())) + report_fatal_error("Function Import: link error"); + + ImportedCount += FunctionsToImport.size(); + } + + // Now link in metadata for all modules from which we imported functions. + for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME : + ModuleToTempMDValsMap) { + // Load the specified source module. + auto &SrcModule = ModuleLoaderCache(SME.getKey()); + + // Link in all necessary metadata from this module. + if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get())) + return false; + } + + DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " + << DestModule.getModuleIdentifier() << "\n"); + return ImportedCount; +} + +/// Summary file to use for function importing when using -function-import from +/// the command line. +static cl::opt<std::string> + SummaryFile("summary-file", + cl::desc("The summary file to use for function importing.")); + +static void diagnosticHandler(const DiagnosticInfo &DI) { + raw_ostream &OS = errs(); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + OS << '\n'; +} + +/// Parse the function index out of an IR file and return the function +/// index object if found, or nullptr if not. +static std::unique_ptr<FunctionInfoIndex> +getFunctionIndexForFile(StringRef Path, std::string &Error, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr<MemoryBuffer> Buffer; + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(Path); + if (std::error_code EC = BufferOrErr.getError()) { + Error = EC.message(); + return nullptr; + } + Buffer = std::move(BufferOrErr.get()); + ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr = + object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(), + DiagnosticHandler); + if (std::error_code EC = ObjOrErr.getError()) { + Error = EC.message(); + return nullptr; + } + return (*ObjOrErr)->takeIndex(); +} + +namespace { +/// Pass that performs cross-module function import provided a summary file. +class FunctionImportPass : public ModulePass { + /// Optional function summary index to use for importing, otherwise + /// the summary-file option must be specified. + const FunctionInfoIndex *Index; + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + const char *getPassName() const override { + return "Function Importing"; + } + + explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr) + : ModulePass(ID), Index(Index) {} + + bool runOnModule(Module &M) override { + if (SummaryFile.empty() && !Index) + report_fatal_error("error: -function-import requires -summary-file or " + "file from frontend\n"); + std::unique_ptr<FunctionInfoIndex> IndexPtr; + if (!SummaryFile.empty()) { + if (Index) + report_fatal_error("error: -summary-file and index from frontend\n"); + std::string Error; + IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); + if (!IndexPtr) { + errs() << "Error loading file '" << SummaryFile << "': " << Error + << "\n"; + return false; + } + Index = IndexPtr.get(); + } + + // Perform the import now. + auto ModuleLoader = [&M](StringRef Identifier) { + return loadFile(Identifier, M.getContext()); + }; + FunctionImporter Importer(*Index, ModuleLoader); + return Importer.importFunctions(M); + + return false; + } +}; +} // anonymous namespace + +char FunctionImportPass::ID = 0; +INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import", + "Summary Based Function Import", false, false) +INITIALIZE_PASS_END(FunctionImportPass, "function-import", + "Summary Based Function Import", false, false) + +namespace llvm { +Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) { + return new FunctionImportPass(Index); +} +} diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp new file mode 100644 index 0000000..9b276ed --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -0,0 +1,256 @@ +//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transform is designed to eliminate unreachable internal globals from the +// program. It uses an aggressive algorithm, searching out globals that are +// known to be alive. After it finds all of the globals which are needed, it +// deletes whatever is left over. This allows it to delete recursive chunks of +// the program which are unreachable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Pass.h" +#include <unordered_map> +using namespace llvm; + +#define DEBUG_TYPE "globaldce" + +STATISTIC(NumAliases , "Number of global aliases removed"); +STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumVariables, "Number of global variables removed"); + +namespace { + struct GlobalDCE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + GlobalDCE() : ModulePass(ID) { + initializeGlobalDCEPass(*PassRegistry::getPassRegistry()); + } + + // run - Do the GlobalDCE pass on the specified module, optionally updating + // the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M) override; + + private: + SmallPtrSet<GlobalValue*, 32> AliveGlobals; + SmallPtrSet<Constant *, 8> SeenConstants; + std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; + + /// GlobalIsNeeded - mark the specific global value as needed, and + /// recursively mark anything that it uses as also needed. + void GlobalIsNeeded(GlobalValue *GV); + void MarkUsedGlobalsAsNeeded(Constant *C); + + bool RemoveUnusedGlobalValue(GlobalValue &GV); + }; +} + +/// Returns true if F contains only a single "ret" instruction. +static bool isEmptyFunction(Function *F) { + BasicBlock &Entry = F->getEntryBlock(); + if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front())) + return false; + ReturnInst &RI = cast<ReturnInst>(Entry.front()); + return RI.getReturnValue() == nullptr; +} + +char GlobalDCE::ID = 0; +INITIALIZE_PASS(GlobalDCE, "globaldce", + "Dead Global Elimination", false, false) + +ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } + +bool GlobalDCE::runOnModule(Module &M) { + bool Changed = false; + + // Remove empty functions from the global ctors list. + Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + + // Collect the set of members for each comdat. + for (Function &F : M) + if (Comdat *C = F.getComdat()) + ComdatMembers.insert(std::make_pair(C, &F)); + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GV)); + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GA)); + + // Loop over the module, adding globals which are obviously necessary. + for (Function &F : M) { + Changed |= RemoveUnusedGlobalValue(F); + // Functions with external linkage are needed if they have a body + if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage()) + if (!F.isDiscardableIfUnused()) + GlobalIsNeeded(&F); + } + + for (GlobalVariable &GV : M.globals()) { + Changed |= RemoveUnusedGlobalValue(GV); + // Externally visible & appending globals are needed, if they have an + // initializer. + if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage()) + if (!GV.isDiscardableIfUnused()) + GlobalIsNeeded(&GV); + } + + for (GlobalAlias &GA : M.aliases()) { + Changed |= RemoveUnusedGlobalValue(GA); + // Externally visible aliases are needed. + if (!GA.isDiscardableIfUnused()) + GlobalIsNeeded(&GA); + } + + // Now that all globals which are needed are in the AliveGlobals set, we loop + // through the program, deleting those which are not alive. + // + + // The first pass is to drop initializers of global variables which are dead. + std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals + for (GlobalVariable &GV : M.globals()) + if (!AliveGlobals.count(&GV)) { + DeadGlobalVars.push_back(&GV); // Keep track of dead globals + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + GV.setInitializer(nullptr); + if (isSafeToDestroyConstant(Init)) + Init->destroyConstant(); + } + } + + // The second pass drops the bodies of functions which are dead... + std::vector<Function *> DeadFunctions; + for (Function &F : M) + if (!AliveGlobals.count(&F)) { + DeadFunctions.push_back(&F); // Keep track of dead globals + if (!F.isDeclaration()) + F.deleteBody(); + } + + // The third pass drops targets of aliases which are dead... + std::vector<GlobalAlias*> DeadAliases; + for (GlobalAlias &GA : M.aliases()) + if (!AliveGlobals.count(&GA)) { + DeadAliases.push_back(&GA); + GA.setAliasee(nullptr); + } + + if (!DeadFunctions.empty()) { + // Now that all interferences have been dropped, delete the actual objects + // themselves. + for (Function *F : DeadFunctions) { + RemoveUnusedGlobalValue(*F); + M.getFunctionList().erase(F); + } + NumFunctions += DeadFunctions.size(); + Changed = true; + } + + if (!DeadGlobalVars.empty()) { + for (GlobalVariable *GV : DeadGlobalVars) { + RemoveUnusedGlobalValue(*GV); + M.getGlobalList().erase(GV); + } + NumVariables += DeadGlobalVars.size(); + Changed = true; + } + + // Now delete any dead aliases. + if (!DeadAliases.empty()) { + for (GlobalAlias *GA : DeadAliases) { + RemoveUnusedGlobalValue(*GA); + M.getAliasList().erase(GA); + } + NumAliases += DeadAliases.size(); + Changed = true; + } + + // Make sure that all memory is released + AliveGlobals.clear(); + SeenConstants.clear(); + ComdatMembers.clear(); + + return Changed; +} + +/// GlobalIsNeeded - the specific global value as needed, and +/// recursively mark anything that it uses as also needed. +void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { + // If the global is already in the set, no need to reprocess it. + if (!AliveGlobals.insert(G).second) + return; + + if (Comdat *C = G->getComdat()) { + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + GlobalIsNeeded(CM.second); + } + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { + // If this is a global variable, we must make sure to add any global values + // referenced by the initializer to the alive set. + if (GV->hasInitializer()) + MarkUsedGlobalsAsNeeded(GV->getInitializer()); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) { + // The target of a global alias is needed. + MarkUsedGlobalsAsNeeded(GA->getAliasee()); + } else { + // Otherwise this must be a function object. We have to scan the body of + // the function looking for constants and global values which are used as + // operands. Any operands of these types must be processed to ensure that + // any globals used will be marked as needed. + Function *F = cast<Function>(G); + + for (Use &U : F->operands()) + MarkUsedGlobalsAsNeeded(cast<Constant>(U.get())); + + for (BasicBlock &BB : *F) + for (Instruction &I : BB) + for (Use &U : I.operands()) + if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) + GlobalIsNeeded(GV); + else if (Constant *C = dyn_cast<Constant>(U)) + MarkUsedGlobalsAsNeeded(C); + } +} + +void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return GlobalIsNeeded(GV); + + // Loop over all of the operands of the constant, adding any globals they + // use to the list of needed globals. + for (Use &U : C->operands()) { + // If we've already processed this constant there's no need to do it again. + Constant *Op = dyn_cast<Constant>(U); + if (Op && SeenConstants.insert(Op).second) + MarkUsedGlobalsAsNeeded(Op); + } +} + +// RemoveUnusedGlobalValue - Loop over all of the uses of the specified +// GlobalValue, looking for the constant pointer ref that may be pointing to it. +// If found, check to see if the constant pointer ref is safe to destroy, and if +// so, nuke it. This will reduce the reference count on the global value, which +// might make it deader. +// +bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { + if (GV.use_empty()) + return false; + GV.removeDeadConstantUsers(); + return GV.use_empty(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp new file mode 100644 index 0000000..fd77369 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -0,0 +1,3234 @@ +//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms simple global variables that never have their address +// taken. If obviously true, it marks read/write globals as constant, deletes +// variables only stored to, etc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include <algorithm> +#include <deque> +using namespace llvm; + +#define DEBUG_TYPE "globalopt" + +STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); +STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); +STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); +STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); +STATISTIC(NumDeleted , "Number of globals deleted"); +STATISTIC(NumGlobUses , "Number of global uses devirtualized"); +STATISTIC(NumLocalized , "Number of globals localized"); +STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); +STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); +STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); +STATISTIC(NumNestRemoved , "Number of nest attributes removed"); +STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); +STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); + +namespace { + struct GlobalOpt : public ModulePass { + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + } + static char ID; // Pass identification, replacement for typeid + GlobalOpt() : ModulePass(ID) { + initializeGlobalOptPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + private: + bool OptimizeFunctions(Module &M); + bool OptimizeGlobalVars(Module &M); + bool OptimizeGlobalAliases(Module &M); + bool deleteIfDead(GlobalValue &GV); + bool processGlobal(GlobalValue &GV); + bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); + + bool isPointerValueDeadOnEntryToFunction(const Function *F, + GlobalValue *GV); + + TargetLibraryInfo *TLI; + SmallSet<const Comdat *, 8> NotDiscardableComdats; + }; +} + +char GlobalOpt::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) + +ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } + +/// Is this global variable possibly used by a leak checker as a root? If so, +/// we might not really want to eliminate the stores to it. +static bool isLeakCheckerRoot(GlobalVariable *GV) { + // A global variable is a root if it is a pointer, or could plausibly contain + // a pointer. There are two challenges; one is that we could have a struct + // the has an inner member which is a pointer. We recurse through the type to + // detect these (up to a point). The other is that we may actually be a union + // of a pointer and another type, and so our LLVM type is an integer which + // gets converted into a pointer, or our type is an [i8 x #] with a pointer + // potentially contained here. + + if (GV->hasPrivateLinkage()) + return false; + + SmallVector<Type *, 4> Types; + Types.push_back(cast<PointerType>(GV->getType())->getElementType()); + + unsigned Limit = 20; + do { + Type *Ty = Types.pop_back_val(); + switch (Ty->getTypeID()) { + default: break; + case Type::PointerTyID: return true; + case Type::ArrayTyID: + case Type::VectorTyID: { + SequentialType *STy = cast<SequentialType>(Ty); + Types.push_back(STy->getElementType()); + break; + } + case Type::StructTyID: { + StructType *STy = cast<StructType>(Ty); + if (STy->isOpaque()) return true; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + Type *InnerTy = *I; + if (isa<PointerType>(InnerTy)) return true; + if (isa<CompositeType>(InnerTy)) + Types.push_back(InnerTy); + } + break; + } + } + if (--Limit == 0) return true; + } while (!Types.empty()); + return false; +} + +/// Given a value that is stored to a global but never read, determine whether +/// it's safe to remove the store and the chain of computation that feeds the +/// store. +static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { + do { + if (isa<Constant>(V)) + return true; + if (!V->hasOneUse()) + return false; + if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || + isa<GlobalValue>(V)) + return false; + if (isAllocationFn(V, TLI)) + return true; + + Instruction *I = cast<Instruction>(V); + if (I->mayHaveSideEffects()) + return false; + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + if (!GEP->hasAllConstantIndices()) + return false; + } else if (I->getNumOperands() != 1) { + return false; + } + + V = I->getOperand(0); + } while (1); +} + +/// This GV is a pointer root. Loop over all users of the global and clean up +/// any that obviously don't assign the global a value that isn't dynamically +/// allocated. +static bool CleanupPointerRootUsers(GlobalVariable *GV, + const TargetLibraryInfo *TLI) { + // A brief explanation of leak checkers. The goal is to find bugs where + // pointers are forgotten, causing an accumulating growth in memory + // usage over time. The common strategy for leak checkers is to whitelist the + // memory pointed to by globals at exit. This is popular because it also + // solves another problem where the main thread of a C++ program may shut down + // before other threads that are still expecting to use those globals. To + // handle that case, we expect the program may create a singleton and never + // destroy it. + + bool Changed = false; + + // If Dead[n].first is the only use of a malloc result, we can delete its + // chain of computation and the store to the global in Dead[n].second. + SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; + + // Constants can't be pointers to dynamically allocated memory. + for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); + UI != E;) { + User *U = *UI++; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + Value *V = SI->getValueOperand(); + if (isa<Constant>(V)) { + Changed = true; + SI->eraseFromParent(); + } else if (Instruction *I = dyn_cast<Instruction>(V)) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, SI)); + } + } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { + if (isa<Constant>(MSI->getValue())) { + Changed = true; + MSI->eraseFromParent(); + } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, MSI)); + } + } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { + GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); + if (MemSrc && MemSrc->isConstant()) { + Changed = true; + MTI->eraseFromParent(); + } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, MTI)); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (Constant *C = dyn_cast<Constant>(U)) { + if (isSafeToDestroyConstant(C)) { + C->destroyConstant(); + // This could have invalidated UI, start over from scratch. + Dead.clear(); + CleanupPointerRootUsers(GV, TLI); + return true; + } + } + } + + for (int i = 0, e = Dead.size(); i != e; ++i) { + if (IsSafeComputationToRemove(Dead[i].first, TLI)) { + Dead[i].second->eraseFromParent(); + Instruction *I = Dead[i].first; + do { + if (isAllocationFn(I, TLI)) + break; + Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); + if (!J) + break; + I->eraseFromParent(); + I = J; + } while (1); + I->eraseFromParent(); + } + } + + return Changed; +} + +/// We just marked GV constant. Loop over all users of the global, cleaning up +/// the obvious ones. This is largely just a quick scan over the use list to +/// clean up the easy and obvious cruft. This returns true if it made a change. +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, + const DataLayout &DL, + TargetLibraryInfo *TLI) { + bool Changed = false; + // Note that we need to use a weak value handle for the worklist items. When + // we delete a constant array, we may also be holding pointer to one of its + // elements (or an element of one of its elements if we're dealing with an + // array of arrays) in the worklist. + SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end()); + while (!WorkList.empty()) { + Value *UV = WorkList.pop_back_val(); + if (!UV) + continue; + + User *U = cast<User>(UV); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (Init) { + // Replace the load with the initializer. + LI->replaceAllUsesWith(Init); + LI->eraseFromParent(); + Changed = true; + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Store must be unreachable or storing Init into the global. + SI->eraseFromParent(); + Changed = true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Constant *SubInit = nullptr; + if (Init) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); + } else if ((CE->getOpcode() == Instruction::BitCast && + CE->getType()->isPointerTy()) || + CE->getOpcode() == Instruction::AddrSpaceCast) { + // Pointer cast, delete any stores and memsets to the global. + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); + } + + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // Do not transform "gepinst (gep constexpr (GV))" here, because forming + // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold + // and will invalidate our notion of what Init is. + Constant *SubInit = nullptr; + if (!isa<ConstantExpr>(GEP->getOperand(0))) { + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( + ConstantFoldInstruction(GEP, DL, TLI)); + if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + + // If the initializer is an all-null value and we have an inbounds GEP, + // we already know what the result of any load from that GEP is. + // TODO: Handle splats. + if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) + SubInit = Constant::getNullValue(GEP->getType()->getElementType()); + } + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); + + if (GEP->use_empty()) { + GEP->eraseFromParent(); + Changed = true; + } + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv + if (MI->getRawDest() == V) { + MI->eraseFromParent(); + Changed = true; + } + + } else if (Constant *C = dyn_cast<Constant>(U)) { + // If we have a chain of dead constantexprs or other things dangling from + // us, and if they are all dead, nuke them without remorse. + if (isSafeToDestroyConstant(C)) { + C->destroyConstant(); + CleanupConstantGlobalUsers(V, Init, DL, TLI); + return true; + } + } + } + return Changed; +} + +/// Return true if the specified instruction is a safe user of a derived +/// expression from a global that we want to SROA. +static bool isSafeSROAElementUse(Value *V) { + // We might have a dead and dangling constant hanging off of here. + if (Constant *C = dyn_cast<Constant>(V)) + return isSafeToDestroyConstant(C); + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // Loads are ok. + if (isa<LoadInst>(I)) return true; + + // Stores *to* the pointer are ok. + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getOperand(0) != V; + + // Otherwise, it must be a GEP. + GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); + if (!GEPI) return false; + + if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || + !cast<Constant>(GEPI->getOperand(1))->isNullValue()) + return false; + + for (User *U : GEPI->users()) + if (!isSafeSROAElementUse(U)) + return false; + return true; +} + + +/// U is a direct user of the specified global value. Look at it and its uses +/// and decide whether it is safe to SROA this global. +static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { + // The user of the global must be a GEP Inst or a ConstantExpr GEP. + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || + cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) + return false; + + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || + !cast<Constant>(U->getOperand(1))->isNullValue() || + !isa<ConstantInt>(U->getOperand(2))) + return false; + + gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); + ++GEPI; // Skip over the pointer index. + + // If this is a use of an array allocation, do a bit more checking for sanity. + if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { + uint64_t NumElements = AT->getNumElements(); + ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); + + // Check to make sure that index falls within the array. If not, + // something funny is going on, so we won't do the optimization. + // + if (Idx->getZExtValue() >= NumElements) + return false; + + // We cannot scalar repl this level of the array unless any array + // sub-indices are in-range constants. In particular, consider: + // A[0][i]. We cannot know that the user isn't doing invalid things like + // allowing i to index an out-of-range subscript that accesses A[1]. + // + // Scalar replacing *just* the outer index of the array is probably not + // going to be a win anyway, so just give up. + for (++GEPI; // Skip array index. + GEPI != E; + ++GEPI) { + uint64_t NumElements; + if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) + NumElements = SubArrayTy->getNumElements(); + else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) + NumElements = SubVectorTy->getNumElements(); + else { + assert((*GEPI)->isStructTy() && + "Indexed GEP type is not array, vector, or struct!"); + continue; + } + + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); + if (!IdxVal || IdxVal->getZExtValue() >= NumElements) + return false; + } + } + + for (User *UU : U->users()) + if (!isSafeSROAElementUse(UU)) + return false; + + return true; +} + +/// Look at all uses of the global and decide whether it is safe for us to +/// perform this transformation. +static bool GlobalUsersSafeToSRA(GlobalValue *GV) { + for (User *U : GV->users()) + if (!IsUserOfGlobalSafeForSRA(U, GV)) + return false; + + return true; +} + + +/// Perform scalar replacement of aggregates on the specified global variable. +/// This opens the door for other optimizations by exposing the behavior of the +/// program in a more fine-grained way. We have determined that this +/// transformation is safe already. We return the first global variable we +/// insert so that the caller can reprocess it. +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { + // Make sure this global only has simple uses that we can SRA. + if (!GlobalUsersSafeToSRA(GV)) + return nullptr; + + assert(GV->hasLocalLinkage() && !GV->isConstant()); + Constant *Init = GV->getInitializer(); + Type *Ty = Init->getType(); + + std::vector<GlobalVariable*> NewGlobals; + Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + + // Get the alignment of the global, either explicit or target-specific. + unsigned StartAlignment = GV->getAlignment(); + if (StartAlignment == 0) + StartAlignment = DL.getABITypeAlignment(GV->getType()); + + if (StructType *STy = dyn_cast<StructType>(Ty)) { + NewGlobals.reserve(STy->getNumElements()); + const StructLayout &Layout = *DL.getStructLayout(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Constant *In = Init->getAggregateElement(i); + assert(In && "Couldn't get element of initializer?"); + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+Twine(i), + GV->getThreadLocalMode(), + GV->getType()->getAddressSpace()); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + Globals.push_back(NGV); + NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + uint64_t FieldOffset = Layout.getElementOffset(i); + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); + if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i))) + NGV->setAlignment(NewAlign); + } + } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + unsigned NumElements = 0; + if (ArrayType *ATy = dyn_cast<ArrayType>(STy)) + NumElements = ATy->getNumElements(); + else + NumElements = cast<VectorType>(STy)->getNumElements(); + + if (NumElements > 16 && GV->hasNUsesOrMore(16)) + return nullptr; // It's not worth it. + NewGlobals.reserve(NumElements); + + uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType()); + unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType()); + for (unsigned i = 0, e = NumElements; i != e; ++i) { + Constant *In = Init->getAggregateElement(i); + assert(In && "Couldn't get element of initializer?"); + + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+Twine(i), + GV->getThreadLocalMode(), + GV->getType()->getAddressSpace()); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + Globals.push_back(NGV); + NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + if (NewAlign > EltAlign) + NGV->setAlignment(NewAlign); + } + } + + if (NewGlobals.empty()) + return nullptr; + + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); + + Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); + + // Loop over all of the uses of the global, replacing the constantexpr geps, + // with smaller constantexpr geps or direct references. + while (!GV->use_empty()) { + User *GEP = GV->user_back(); + assert(((isa<ConstantExpr>(GEP) && + cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| + isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); + + // Ignore the 1th operand, which has to be zero or else the program is quite + // broken (undefined). Get the 2nd operand, which is the structure or array + // index. + unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); + if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. + + Value *NewPtr = NewGlobals[Val]; + Type *NewTy = NewGlobals[Val]->getValueType(); + + // Form a shorter GEP if needed. + if (GEP->getNumOperands() > 3) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { + SmallVector<Constant*, 8> Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) + Idxs.push_back(CE->getOperand(i)); + NewPtr = + ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); + } else { + GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); + SmallVector<Value*, 8> Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) + Idxs.push_back(GEPI->getOperand(i)); + NewPtr = GetElementPtrInst::Create( + NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI); + } + } + GEP->replaceAllUsesWith(NewPtr); + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) + GEPI->eraseFromParent(); + else + cast<ConstantExpr>(GEP)->destroyConstant(); + } + + // Delete the old global, now that it is dead. + Globals.erase(GV); + ++NumSRA; + + // Loop over the new globals array deleting any globals that are obviously + // dead. This can arise due to scalarization of a structure or an array that + // has elements that are dead. + unsigned FirstGlobal = 0; + for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) + if (NewGlobals[i]->use_empty()) { + Globals.erase(NewGlobals[i]); + if (FirstGlobal == i) ++FirstGlobal; + } + + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; +} + +/// Return true if all users of the specified value will trap if the value is +/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid +/// reprocessing them. +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSetImpl<const PHINode*> &PHIs) { + for (const User *U : V->users()) + if (isa<LoadInst>(U)) { + // Will trap. + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == V) { + //cerr << "NONTRAPPING USE: " << *U; + return false; // Storing the value. + } + } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { + if (CI->getCalledValue() != V) { + //cerr << "NONTRAPPING USE: " << *U; + return false; // Not calling the ptr + } + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { + if (II->getCalledValue() != V) { + //cerr << "NONTRAPPING USE: " << *U; + return false; // Not calling the ptr + } + } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { + if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { + // If we've already seen this phi node, ignore it, it has already been + // checked. + if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) + return false; + } else if (isa<ICmpInst>(U) && + isa<ConstantPointerNull>(U->getOperand(1))) { + // Ignore icmp X, null + } else { + //cerr << "NONTRAPPING USE: " << *U; + return false; + } + + return true; +} + +/// Return true if all uses of any loads from GV will trap if the loaded value +/// is null. Note that this also permits comparisons of the loaded value +/// against null, as a special case. +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (const User *U : GV->users()) + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + SmallPtrSet<const PHINode*, 8> PHIs; + if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) + return false; + } else if (isa<StoreInst>(U)) { + // Ignore stores to the global. + } else { + // We don't know or understand this user, bail out. + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; + return false; + } + return true; +} + +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { + bool Changed = false; + for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { + Instruction *I = cast<Instruction>(*UI++); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + LI->setOperand(0, NewV); + Changed = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (SI->getOperand(1) == V) { + SI->setOperand(1, NewV); + Changed = true; + } + } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + CallSite CS(I); + if (CS.getCalledValue() == V) { + // Calling through the pointer! Turn into a direct call, but be careful + // that the pointer is not also being passed as an argument. + CS.setCalledFunction(NewV); + Changed = true; + bool PassedAsArg = false; + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { + PassedAsArg = true; + CS.setArgument(i, NewV); + } + + if (PassedAsArg) { + // Being passed as an argument also. Be careful to not invalidate UI! + UI = V->user_begin(); + } + } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + Changed |= OptimizeAwayTrappingUsesOfValue(CI, + ConstantExpr::getCast(CI->getOpcode(), + NewV, CI->getType())); + if (CI->use_empty()) { + Changed = true; + CI->eraseFromParent(); + } + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + // Should handle GEP here. + SmallVector<Constant*, 8> Idxs; + Idxs.reserve(GEPI->getNumOperands()-1); + for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); + i != e; ++i) + if (Constant *C = dyn_cast<Constant>(*i)) + Idxs.push_back(C); + else + break; + if (Idxs.size() == GEPI->getNumOperands()-1) + Changed |= OptimizeAwayTrappingUsesOfValue( + GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs)); + if (GEPI->use_empty()) { + Changed = true; + GEPI->eraseFromParent(); + } + } + } + + return Changed; +} + + +/// The specified global has only one non-null value stored into it. If there +/// are uses of the loaded value that would trap if the loaded value is +/// dynamically null, then we know that they cannot be reachable with a null +/// optimize away the load. +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, + const DataLayout &DL, + TargetLibraryInfo *TLI) { + bool Changed = false; + + // Keep track of whether we are able to remove all the uses of the global + // other than the store that defines it. + bool AllNonStoreUsesGone = true; + + // Replace all uses of loads with uses of uses of the stored value. + for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){ + User *GlobalUser = *GUI++; + if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); + // If we were able to delete all uses of the loads + if (LI->use_empty()) { + LI->eraseFromParent(); + Changed = true; + } else { + AllNonStoreUsesGone = false; + } + } else if (isa<StoreInst>(GlobalUser)) { + // Ignore the store that stores "LV" to the global. + assert(GlobalUser->getOperand(1) == GV && + "Must be storing *to* the global"); + } else { + AllNonStoreUsesGone = false; + + // If we get here we could have other crazy uses that are transitively + // loaded. + assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || + isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || + isa<BitCastInst>(GlobalUser) || + isa<GetElementPtrInst>(GlobalUser)) && + "Only expect load and stores!"); + } + } + + if (Changed) { + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n"); + ++NumGlobUses; + } + + // If we nuked all of the loads, then none of the stores are needed either, + // nor is the global. + if (AllNonStoreUsesGone) { + if (isLeakCheckerRoot(GV)) { + Changed |= CleanupPointerRootUsers(GV, TLI); + } else { + Changed = true; + CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); + } + if (GV->use_empty()) { + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); + Changed = true; + GV->eraseFromParent(); + ++NumDeleted; + } + } + return Changed; +} + +/// Walk the use list of V, constant folding all of the instructions that are +/// foldable. +static void ConstantPropUsersOf(Value *V, const DataLayout &DL, + TargetLibraryInfo *TLI) { + for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) + if (Instruction *I = dyn_cast<Instruction>(*UI++)) + if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { + I->replaceAllUsesWith(NewC); + + // Advance UI to the next non-I use to avoid invalidating it! + // Instructions could multiply use V. + while (UI != E && *UI == I) + ++UI; + I->eraseFromParent(); + } +} + +/// This function takes the specified global variable, and transforms the +/// program as if it always contained the result of the specified malloc. +/// Because it is always the result of the specified malloc, there is no reason +/// to actually DO the malloc. Instead, turn the malloc into a global, and any +/// loads of GV as uses of the new global. +static GlobalVariable * +OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, + ConstantInt *NElements, const DataLayout &DL, + TargetLibraryInfo *TLI) { + DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); + + Type *GlobalType; + if (NElements->getZExtValue() == 1) + GlobalType = AllocTy; + else + // If we have an array allocation, the global variable is of an array. + GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + GlobalVariable *NewGV = new GlobalVariable( + *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, + UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, + GV->getThreadLocalMode()); + + // If there are bitcast users of the malloc (which is typical, usually we have + // a malloc + bitcast) then replace them with uses of the new global. Update + // other users to use the global as well. + BitCastInst *TheBC = nullptr; + while (!CI->use_empty()) { + Instruction *User = cast<Instruction>(CI->user_back()); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { + if (BCI->getType() == NewGV->getType()) { + BCI->replaceAllUsesWith(NewGV); + BCI->eraseFromParent(); + } else { + BCI->setOperand(0, NewGV); + } + } else { + if (!TheBC) + TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); + User->replaceUsesOfWith(CI, TheBC); + } + } + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), + GV->getName()+".init", GV->getThreadLocalMode()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + while (!GV->use_empty()) { + if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, + SI->getOrdering(), SI->getSynchScope(), SI); + SI->eraseFromParent(); + continue; + } + + LoadInst *LI = cast<LoadInst>(GV->user_back()); + while (!LI->use_empty()) { + Use &LoadUse = *LI->use_begin(); + ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); + if (!ICI) { + LoadUse = RepValue; + continue; + } + + // Replace the cmp X, 0 with a use of the bool value. + // Sink the load to where the compare was, if atomic rules allow us to. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, + LI->getOrdering(), LI->getSynchScope(), + LI->isUnordered() ? (Instruction*)ICI : LI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: // X < null -> always false + LV = ConstantInt::getFalse(GV->getContext()); + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + LI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast<StoreInst>(InitBool->user_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); + + // Now the GV is dead, nuke it and the malloc.. + GV->eraseFromParent(); + CI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV, DL, TLI); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue, DL, TLI); + + return NewGV; +} + +/// Scan the use-list of V checking to make sure that there are no complex uses +/// of V. We permit simple things like dereferencing the pointer, but not +/// storing through the address, unless it is to the specified global. +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSetImpl<const PHINode*> &PHIs) { + for (const User *U : V->users()) { + const Instruction *Inst = cast<Instruction>(U); + + if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { + continue; // Fine, ignore. + } + + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->getOperand(0) == V && SI->getOperand(1) != GV) + return false; // Storing the pointer itself... bad. + continue; // Otherwise, storing through it, or storing into GV... fine. + } + + // Must index into the array and into the struct. + if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) + return false; + continue; + } + + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { + // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI + // cycles. + if (PHIs.insert(PN).second) + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) + return false; + continue; + } + + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + return false; + continue; + } + + return false; + } + return true; +} + +/// The Alloc pointer is stored into GV somewhere. Transform all uses of the +/// allocation into loads from the global and uses of the resultant pointer. +/// Further, delete the store into GV. This assumes that these value pass the +/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, + GlobalVariable *GV) { + while (!Alloc->use_empty()) { + Instruction *U = cast<Instruction>(*Alloc->user_begin()); + Instruction *InsertPt = U; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // If this is the store of the allocation into the global, remove it. + if (SI->getOperand(1) == GV) { + SI->eraseFromParent(); + continue; + } + } else if (PHINode *PN = dyn_cast<PHINode>(U)) { + // Insert the load in the corresponding predecessor, not right before the + // PHI. + InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator(); + } else if (isa<BitCastInst>(U)) { + // Must be bitcast between the malloc and store to initialize the global. + ReplaceUsesOfMallocWithGlobal(U, GV); + U->eraseFromParent(); + continue; + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + // If this is a "GEP bitcast" and the user is a store to the global, then + // just process it as a bitcast. + if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) + if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back())) + if (SI->getOperand(1) == GV) { + // Must be bitcast GEP between the malloc and store to initialize + // the global. + ReplaceUsesOfMallocWithGlobal(GEPI, GV); + GEPI->eraseFromParent(); + continue; + } + } + + // Insert a load from the global, and use it instead of the malloc. + Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); + U->replaceUsesOfWith(Alloc, NL); + } +} + +/// Verify that all uses of V (a load, or a phi of a load) are simple enough to +/// perform heap SRA on. This permits GEP's that index through the array and +/// struct field, icmps of null, and PHIs. +static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, + SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs, + SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) { + // We permit two users of the load: setcc comparing against the null + // pointer, and a getelementptr of a specific form. + for (const User *U : V->users()) { + const Instruction *UI = cast<Instruction>(U); + + // Comparison against null is ok. + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return false; + continue; + } + + // getelementptr is also ok, but only a simple form. + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) { + // Must index into the array and into the struct. + if (GEPI->getNumOperands() < 3) + return false; + + // Otherwise the GEP is ok. + continue; + } + + if (const PHINode *PN = dyn_cast<PHINode>(UI)) { + if (!LoadUsingPHIsPerLoad.insert(PN).second) + // This means some phi nodes are dependent on each other. + // Avoid infinite looping! + return false; + if (!LoadUsingPHIs.insert(PN).second) + // If we have already analyzed this PHI, then it is safe. + continue; + + // Make sure all uses of the PHI are simple enough to transform. + if (!LoadUsesSimpleEnoughForHeapSRA(PN, + LoadUsingPHIs, LoadUsingPHIsPerLoad)) + return false; + + continue; + } + + // Otherwise we don't know what this is, not ok. + return false; + } + + return true; +} + + +/// If all users of values loaded from GV are simple enough to perform HeapSRA, +/// return true. +static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, + Instruction *StoredVal) { + SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; + SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; + for (const User *U : GV->users()) + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, + LoadUsingPHIsPerLoad)) + return false; + LoadUsingPHIsPerLoad.clear(); + } + + // If we reach here, we know that all uses of the loads and transitive uses + // (through PHI nodes) are simple enough to transform. However, we don't know + // that all inputs the to the PHI nodes are in the same equivalence sets. + // Check to verify that all operands of the PHIs are either PHIS that can be + // transformed, loads from GV, or MI itself. + for (const PHINode *PN : LoadUsingPHIs) { + for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { + Value *InVal = PN->getIncomingValue(op); + + // PHI of the stored value itself is ok. + if (InVal == StoredVal) continue; + + if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { + // One of the PHIs in our set is (optimistically) ok. + if (LoadUsingPHIs.count(InPN)) + continue; + return false; + } + + // Load from GV is ok. + if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) + if (LI->getOperand(0) == GV) + continue; + + // UNDEF? NULL? + + // Anything else is rejected. + return false; + } + } + + return true; +} + +static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, + DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; + + if (FieldNo >= FieldVals.size()) + FieldVals.resize(FieldNo+1); + + // If we already have this value, just reuse the previously scalarized + // version. + if (Value *FieldVal = FieldVals[FieldNo]) + return FieldVal; + + // Depending on what instruction this is, we have several cases. + Value *Result; + if (LoadInst *LI = dyn_cast<LoadInst>(V)) { + // This is a scalarized version of the load from the global. Just create + // a new Load of the scalarized global. + Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, + InsertedScalarizedValues, + PHIsToRewrite), + LI->getName()+".f"+Twine(FieldNo), LI); + } else { + PHINode *PN = cast<PHINode>(V); + // PN's type is pointer to struct. Make a new PHI of pointer to struct + // field. + + PointerType *PTy = cast<PointerType>(PN->getType()); + StructType *ST = cast<StructType>(PTy->getElementType()); + + unsigned AS = PTy->getAddressSpace(); + PHINode *NewPN = + PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), + PN->getNumIncomingValues(), + PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; + PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); + } + + return FieldVals[FieldNo] = Result; +} + +/// Given a load instruction and a value derived from the load, rewrite the +/// derived value to use the HeapSRoA'd load. +static void RewriteHeapSROALoadUser(Instruction *LoadUser, + DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + // If this is a comparison against null, handle it. + if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { + assert(isa<ConstantPointerNull>(SCI->getOperand(1))); + // If we have a setcc of the loaded pointer, we can use a setcc of any + // field. + Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, + InsertedScalarizedValues, PHIsToRewrite); + + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName()); + SCI->replaceAllUsesWith(New); + SCI->eraseFromParent(); + return; + } + + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { + assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) + && "Unexpected GEPI!"); + + // Load the pointer for this field. + unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); + Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, + InsertedScalarizedValues, PHIsToRewrite); + + // Create the new GEP idx vector. + SmallVector<Value*, 8> GEPIdx; + GEPIdx.push_back(GEPI->getOperand(1)); + GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); + + Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NGEPI); + GEPI->eraseFromParent(); + return; + } + + // Recursively transform the users of PHI nodes. This will lazily create the + // PHIs that are needed for individual elements. Keep track of what PHIs we + // see in InsertedScalarizedValues so that we don't get infinite loops (very + // antisocial). If the PHI is already in InsertedScalarizedValues, it has + // already been seen first by another load, so its uses have already been + // processed. + PHINode *PN = cast<PHINode>(LoadUser); + if (!InsertedScalarizedValues.insert(std::make_pair(PN, + std::vector<Value*>())).second) + return; + + // If this is the first time we've seen this PHI, recursively process all + // users. + for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + } +} + +/// We are performing Heap SRoA on a global. Ptr is a value loaded from the +/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. +/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, + DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + } + + if (Load->use_empty()) { + Load->eraseFromParent(); + InsertedScalarizedValues.erase(Load); + } +} + +/// CI is an allocation of an array of structures. Break it up into multiple +/// allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, + Value *NElems, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); + Type *MAT = getMallocAllocatedType(CI, TLI); + StructType *STy = cast<StructType>(MAT); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(CI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as CI, and N globals. + std::vector<Value*> FieldGlobals; + std::vector<Value*> FieldMallocs; + + unsigned AS = GV->getType()->getPointerAddressSpace(); + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + Type *FieldTy = STy->getElementType(FieldNo); + PointerType *PFieldTy = PointerType::get(FieldTy, AS); + + GlobalVariable *NGV = new GlobalVariable( + *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), + nullptr, GV->getThreadLocalMode()); + FieldGlobals.push_back(NGV); + + unsigned TypeSize = DL.getTypeAllocSize(FieldTy); + if (StructType *ST = dyn_cast<StructType>(FieldTy)) + TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); + Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, + ConstantInt::get(IntPtrTy, TypeSize), + NElems, nullptr, + CI->getName() + ".f" + Twine(FieldNo)); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, CI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + // The malloc can also fail if its argument is too large. + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), + ConstantZero, "isneg"); + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = CI->getParent(); + BasicBlock *ContBB = + OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), + "malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType())); + BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", + OrigBB->getParent()); + Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, + Cmp, NullPtrBlock); + + // Fill in FreeBlock. + CallInst::CreateFree(GVVal, BI); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + BranchInst::Create(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + BranchInst::Create(ContBB, NullPtrBlock); + + // CI is no longer needed, remove it. + CI->eraseFromParent(); + + /// As we process loads, if we can't immediately update all uses of the load, + /// keep track of what scalarized loads are inserted for a given load. + DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; + InsertedScalarizedValues[GV] = FieldGlobals; + + std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); + continue; + } + + // Must be a store of null. + StoreInst *SI = cast<StoreInst>(User); + assert(isa<ConstantPointerNull>(SI->getOperand(0)) && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); + Constant *Null = Constant::getNullValue(PT->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + + // While we have PHIs that are interesting to rewrite, do it. + while (!PHIsToRewrite.empty()) { + PHINode *PN = PHIsToRewrite.back().first; + unsigned FieldNo = PHIsToRewrite.back().second; + PHIsToRewrite.pop_back(); + PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); + assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + + // Add all the incoming values. This can materialize more phis. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, + PHIsToRewrite); + FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); + } + } + + // Drop all inter-phi links and any loads that made it this far. + for (DenseMap<Value*, std::vector<Value*> >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I->first)) + PN->dropAllReferences(); + else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) + LI->dropAllReferences(); + } + + // Delete all the phis and loads now that inter-references are dead. + for (DenseMap<Value*, std::vector<Value*> >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I->first)) + PN->eraseFromParent(); + else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return cast<GlobalVariable>(FieldGlobals[0]); +} + +/// This function is called when we see a pointer global variable with a single +/// value stored it that is a malloc or cast of malloc. +static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, + Type *AllocTy, + AtomicOrdering Ordering, + const DataLayout &DL, + TargetLibraryInfo *TLI) { + // If this is a malloc of an abstract type, don't touch it. + if (!AllocTy->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded icmp'd, and + // GEP'd. These are all things we could transform to using the global + // for. + SmallPtrSet<const PHINode*, 8> PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + // We cannot optimize the malloc if we cannot determine malloc array size. + Value *NElems = getMallocArraySize(CI, DL, TLI, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { + OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); + return true; + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + if (Ordering != NotAtomic) + return false; + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) + if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); + + StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); + unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + nullptr, CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc)) + CI = cast<CallInst>(BCI->getOperand(0)); + else + CI = cast<CallInst>(Malloc); + } + + PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL, + TLI); + return true; + } + + return false; +} + +// Try to optimize globals based on the knowledge that only one value (besides +// its initializer) is ever stored to the global. +static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + AtomicOrdering Ordering, + const DataLayout &DL, + TargetLibraryInfo *TLI) { + // Ignore no-op GEPs and bitcasts. + StoredOnceVal = StoredOnceVal->stripPointerCasts(); + + // If we are dealing with a pointer global that is initialized to null and + // only has one (non-null) value stored into it, then we can optimize any + // users of the loaded value (often calls and loads) that would trap if the + // value was null. + if (GV->getInitializer()->getType()->isPointerTy() && + GV->getInitializer()->isNullValue()) { + if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { + if (GV->getInitializer()->getType() != SOVC->getType()) + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + + // Optimize away any trapping uses of the loaded value. + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI)) + return true; + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + Type *MallocType = getMallocAllocatedType(CI, TLI); + if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + Ordering, DL, TLI)) + return true; + } + } + + return false; +} + +/// At this point, we have learned that the only two values ever stored into GV +/// are its initializer and OtherVal. See if we can shrink the global into a +/// boolean and select between the two values whenever it is used. This exposes +/// the values to other scalar optimizations. +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { + Type *GVElType = GV->getType()->getElementType(); + + // If GVElType is already i1, it is already shrunk. If the type of the GV is + // an FP value, pointer or vector, don't do this optimization because a select + // between them is very expensive and unlikely to lead to later + // simplification. In these cases, we typically end up with "cond ? v1 : v2" + // where v1 and v2 both require constant pool loads, a big loss. + if (GVElType == Type::getInt1Ty(GV->getContext()) || + GVElType->isFloatingPointTy() || + GVElType->isPointerTy() || GVElType->isVectorTy()) + return false; + + // Walk the use list of the global seeing if all the uses are load or store. + // If there is anything else, bail out. + for (User *U : GV->users()) + if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) + return false; + + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); + + // Create the new global, initializing it to false. + GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), + false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), + GV->getName()+".b", + GV->getThreadLocalMode(), + GV->getType()->getAddressSpace()); + GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); + + Constant *InitVal = GV->getInitializer(); + assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && + "No reason to shrink to bool!"); + + // If initialized to zero and storing one into the global, we can use a cast + // instead of a select to synthesize the desired value. + bool IsOneZero = false; + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) + IsOneZero = InitVal->isNullValue() && CI->isOne(); + + while (!GV->use_empty()) { + Instruction *UI = cast<Instruction>(GV->user_back()); + if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { + // Change the store into a boolean store. + bool StoringOther = SI->getOperand(0) == OtherVal; + // Only do this if we weren't storing a loaded value. + Value *StoreVal; + if (StoringOther || SI->getOperand(0) == InitVal) { + StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), + StoringOther); + } else { + // Otherwise, we are storing a previously loaded copy. To do this, + // change the copy from copying the original value to just copying the + // bool. + Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); + + // If we've already replaced the input, StoredVal will be a cast or + // select instruction. If not, it will be a load of the original + // global. + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + assert(LI->getOperand(0) == GV && "Not a copy!"); + // Insert a new load, to preserve the saved value. + StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0, + LI->getOrdering(), LI->getSynchScope(), LI); + } else { + assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && + "This is not a form that we understand!"); + StoreVal = StoredVal->getOperand(0); + assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); + } + } + new StoreInst(StoreVal, NewGV, false, 0, + SI->getOrdering(), SI->getSynchScope(), SI); + } else { + // Change the load into a load of bool then a select. + LoadInst *LI = cast<LoadInst>(UI); + LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0, + LI->getOrdering(), LI->getSynchScope(), LI); + Value *NSI; + if (IsOneZero) + NSI = new ZExtInst(NLI, LI->getType(), "", LI); + else + NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); + NSI->takeName(LI); + LI->replaceAllUsesWith(NSI); + } + UI->eraseFromParent(); + } + + // Retain the name of the old global variable. People who are debugging their + // programs may expect these variables to be named the same. + NewGV->takeName(GV); + GV->eraseFromParent(); + return true; +} + +bool GlobalOpt::deleteIfDead(GlobalValue &GV) { + GV.removeDeadConstantUsers(); + + if (!GV.isDiscardableIfUnused()) + return false; + + if (const Comdat *C = GV.getComdat()) + if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) + return false; + + bool Dead; + if (auto *F = dyn_cast<Function>(&GV)) + Dead = F->isDefTriviallyDead(); + else + Dead = GV.use_empty(); + if (!Dead) + return false; + + DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); + GV.eraseFromParent(); + ++NumDeleted; + return true; +} + +/// Analyze the specified global variable and optimize it if possible. If we +/// make a change, return true. +bool GlobalOpt::processGlobal(GlobalValue &GV) { + // Do more involved optimizations if the global is internal. + if (!GV.hasLocalLinkage()) + return false; + + GlobalStatus GS; + + if (GlobalStatus::analyzeGlobal(&GV, GS)) + return false; + + bool Changed = false; + if (!GS.IsCompared && !GV.hasUnnamedAddr()) { + GV.setUnnamedAddr(true); + NumUnnamed++; + Changed = true; + } + + auto *GVar = dyn_cast<GlobalVariable>(&GV); + if (!GVar) + return Changed; + + if (GVar->isConstant() || !GVar->hasInitializer()) + return Changed; + + return processInternalGlobal(GVar, GS) || Changed; +} + +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) { + // Find all uses of GV. We expect them all to be in F, and if we can't + // identify any of the uses we bail out. + // + // On each of these uses, identify if the memory that GV points to is + // used/required/live at the start of the function. If it is not, for example + // if the first thing the function does is store to the GV, the GV can + // possibly be demoted. + // + // We don't do an exhaustive search for memory operations - simply look + // through bitcasts as they're quite common and benign. + const DataLayout &DL = GV->getParent()->getDataLayout(); + SmallVector<LoadInst *, 4> Loads; + SmallVector<StoreInst *, 4> Stores; + for (auto *U : GV->users()) { + if (Operator::getOpcode(U) == Instruction::BitCast) { + for (auto *UU : U->users()) { + if (auto *LI = dyn_cast<LoadInst>(UU)) + Loads.push_back(LI); + else if (auto *SI = dyn_cast<StoreInst>(UU)) + Stores.push_back(SI); + else + return false; + } + continue; + } + + Instruction *I = dyn_cast<Instruction>(U); + if (!I) + return false; + assert(I->getParent()->getParent() == F); + + if (auto *LI = dyn_cast<LoadInst>(I)) + Loads.push_back(LI); + else if (auto *SI = dyn_cast<StoreInst>(I)) + Stores.push_back(SI); + else + return false; + } + + // We have identified all uses of GV into loads and stores. Now check if all + // of them are known not to depend on the value of the global at the function + // entry point. We do this by ensuring that every load is dominated by at + // least one store. + auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F)) + .getDomTree(); + + // The below check is quadratic. Check we're not going to do too many tests. + // FIXME: Even though this will always have worst-case quadratic time, we + // could put effort into minimizing the average time by putting stores that + // have been shown to dominate at least one load at the beginning of the + // Stores array, making subsequent dominance checks more likely to succeed + // early. + // + // The threshold here is fairly large because global->local demotion is a + // very powerful optimization should it fire. + const unsigned Threshold = 100; + if (Loads.size() * Stores.size() > Threshold) + return false; + + for (auto *L : Loads) { + auto *LTy = L->getType(); + if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) { + auto *STy = S->getValueOperand()->getType(); + // The load is only dominated by the store if DomTree says so + // and the number of bits loaded in L is less than or equal to + // the number of bits stored in S. + return DT.dominates(S, L) && + DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy); + })) + return false; + } + // All loads have known dependences inside F, so the global can be localized. + return true; +} + +/// C may have non-instruction users. Can all of those users be turned into +/// instructions? +static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { + // We don't do this exhaustively. The most common pattern that we really need + // to care about is a constant GEP or constant bitcast - so just looking + // through one single ConstantExpr. + // + // The set of constants that this function returns true for must be able to be + // handled by makeAllConstantUsesInstructions. + for (auto *U : C->users()) { + if (isa<Instruction>(U)) + continue; + if (!isa<ConstantExpr>(U)) + // Non instruction, non-constantexpr user; cannot convert this. + return false; + for (auto *UU : U->users()) + if (!isa<Instruction>(UU)) + // A constantexpr used by another constant. We don't try and recurse any + // further but just bail out at this point. + return false; + } + + return true; +} + +/// C may have non-instruction users, and +/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the +/// non-instruction users to instructions. +static void makeAllConstantUsesInstructions(Constant *C) { + SmallVector<ConstantExpr*,4> Users; + for (auto *U : C->users()) { + if (isa<ConstantExpr>(U)) + Users.push_back(cast<ConstantExpr>(U)); + else + // We should never get here; allNonInstructionUsersCanBeMadeInstructions + // should not have returned true for C. + assert( + isa<Instruction>(U) && + "Can't transform non-constantexpr non-instruction to instruction!"); + } + + SmallVector<Value*,4> UUsers; + for (auto *U : Users) { + UUsers.clear(); + for (auto *UU : U->users()) + UUsers.push_back(UU); + for (auto *UU : UUsers) { + Instruction *UI = cast<Instruction>(UU); + Instruction *NewU = U->getAsInstruction(); + NewU->insertBefore(UI); + UI->replaceUsesOfWith(U, NewU); + } + U->dropAllReferences(); + } +} + +/// Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, + const GlobalStatus &GS) { + auto &DL = GV->getParent()->getDataLayout(); + // If this is a first class global and has only one accessing function and + // this function is non-recursive, we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non-single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && + GV->getType()->getElementType()->isSingleValueType() && + GV->getType()->getAddressSpace() == 0 && + !GV->isExternallyInitialized() && + allNonInstructionUsersCanBeMadeInstructions(GV) && + GS.AccessingFunction->doesNotRecurse() && + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); + Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction + ->getEntryBlock().begin()); + Type *ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, + GV->getName(), &FirstI); + if (!isa<UndefValue>(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + makeAllConstantUsesInstructions(GV); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.IsLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); + + bool Changed; + if (isLeakCheckerRoot(GV)) { + // Delete any constant stores to the global. + Changed = CleanupPointerRootUsers(GV, TLI); + } else { + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + } + + // If the global is dead now, delete it. + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + Changed = true; + } + return Changed; + + } else if (GS.StoredType <= GlobalStatus::InitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); + GV->setConstant(true); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (SRAGlobal(GV, DL)) + return true; + } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (isa<UndefValue>(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + ++NumSubstitute; + return true; + } + + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)) + return true; + + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) { + if (GS.Ordering == NotAtomic) { + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; + return true; + } + } + } + } + + return false; +} + +/// Walk all of the direct calls of the specified function, changing them to +/// FastCC. +static void ChangeCalleesToFastCall(Function *F) { + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) + continue; + CallSite CS(cast<Instruction>(U)); + CS.setCallingConv(CallingConv::Fast); + } +} + +static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { + for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { + unsigned Index = Attrs.getSlotIndex(i); + if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest)) + continue; + + // There can be only one. + return Attrs.removeAttribute(C, Index, Attribute::Nest); + } + + return Attrs; +} + +static void RemoveNestAttribute(Function *F) { + F->setAttributes(StripNest(F->getContext(), F->getAttributes())); + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) + continue; + CallSite CS(cast<Instruction>(U)); + CS.setAttributes(StripNest(F->getContext(), CS.getAttributes())); + } +} + +/// Return true if this is a calling convention that we'd like to change. The +/// idea here is that we don't want to mess with the convention if the user +/// explicitly requested something with performance implications like coldcc, +/// GHC, or anyregcc. +static bool isProfitableToMakeFastCC(Function *F) { + CallingConv::ID CC = F->getCallingConv(); + // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? + return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; +} + +bool GlobalOpt::OptimizeFunctions(Module &M) { + bool Changed = false; + // Optimize functions. + for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { + Function *F = &*FI++; + // Functions without names cannot be referenced outside this module. + if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) + F->setLinkage(GlobalValue::InternalLinkage); + + if (deleteIfDead(*F)) { + Changed = true; + continue; + } + + Changed |= processGlobal(*F); + + if (!F->hasLocalLinkage()) + continue; + if (isProfitableToMakeFastCC(F) && !F->isVarArg() && + !F->hasAddressTaken()) { + // If this function has a calling convention worth changing, is not a + // varargs function, and is only called directly, promote it to use the + // Fast calling convention. + F->setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(F); + ++NumFastCallFns; + Changed = true; + } + + if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && + !F->hasAddressTaken()) { + // The function is not used by a trampoline intrinsic, so it is safe + // to remove the 'nest' attribute. + RemoveNestAttribute(F); + ++NumNestRemoved; + Changed = true; + } + } + return Changed; +} + +bool GlobalOpt::OptimizeGlobalVars(Module &M) { + bool Changed = false; + + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = &*GVI++; + // Global variables without names cannot be referenced outside this module. + if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) + GV->setLinkage(GlobalValue::InternalLinkage); + // Simplify the initializer. + if (GV->hasInitializer()) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { + auto &DL = M.getDataLayout(); + Constant *New = ConstantFoldConstantExpression(CE, DL, TLI); + if (New && New != CE) + GV->setInitializer(New); + } + + if (deleteIfDead(*GV)) { + Changed = true; + continue; + } + + Changed |= processGlobal(*GV); + } + return Changed; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); + +/// Return true if the specified constant can be handled by the code generator. +/// We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // Simple global addresses are supported, do not allow dllimport or + // thread-local globals. + if (auto *GV = dyn_cast<GlobalValue>(C)) + return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); + + // Simple integer, undef, constant aggregate zero, etc are all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || + isa<ConstantVector>(C)) { + for (Value *Op : C->operands()) + if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL)) + return false; + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + // Bitcast is fine if the casted value is fine. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // int <=> ptr is fine if the int type is the same size as the + // pointer type. + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C).second) + return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); +} + + +/// Return true if this constant is simple enough for us to understand. In +/// particular, if it is a cast to anything other than from one pointer type to +/// another pointer type, we punt. We basically just support direct accesses to +/// globals and GEP's of globals. This should be kept up to date with +/// CommitValueTo. +static bool isSimpleEnoughPointerToCommit(Constant *C) { + // Conservatively, avoid aggregate types. This is because we don't + // want to worry about them partially overlapping other stores. + if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) + return false; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + // Do not allow weak/*_odr/linkonce linkage or external globals. + return GV->hasUniqueInitializer(); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + // Handle a constantexpr gep. + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0)) && + cast<GEPOperator>(CE)->isInBounds()) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + if (!GV->hasUniqueInitializer()) + return false; + + // The first index must be zero. + ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); + if (!CI || !CI->isZero()) return false; + + // The remaining indices must be compile-time known integers within the + // notional bounds of the corresponding static array types. + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa<GlobalVariable>(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); + } + } + + return false; +} + +/// Evaluate a piece of a constantexpr store into a global initializer. This +/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the +/// GEP operands of Addr [0, OpNo) have been stepped into. +static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, + ConstantExpr *Addr, unsigned OpNo) { + // Base case of the recursion. + if (OpNo == Addr->getNumOperands()) { + assert(Val->getType() == Init->getType() && "Type mismatch!"); + return Val; + } + + SmallVector<Constant*, 32> Elts; + if (StructType *STy = dyn_cast<StructType>(Init->getType())) { + // Break up the constant into its elements. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); + + // Replace the element that we are supposed to. + ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); + unsigned Idx = CU->getZExtValue(); + assert(Idx < STy->getNumElements() && "Struct index out of range!"); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); + + // Return the modified struct. + return ConstantStruct::get(STy, Elts); + } + + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); + SequentialType *InitTy = cast<SequentialType>(Init->getType()); + + uint64_t NumElts; + if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = InitTy->getVectorNumElements(); + + // Break up the array into elements. + for (uint64_t i = 0, e = NumElts; i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); + + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + return ConstantVector::get(Elts); +} + +/// We have decided that Addr (which satisfies the predicate +/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. +static void CommitValueTo(Constant *Val, Constant *Addr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { + assert(GV->hasInitializer()); + GV->setInitializer(Val); + return; + } + + ConstantExpr *CE = cast<ConstantExpr>(Addr); + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); +} + +namespace { + +/// This class evaluates LLVM IR, producing the Constant representing each SSA +/// instruction. Changes to global variables are stored in a mapping that can +/// be iterated over after the evaluation is complete. Once an evaluation call +/// fails, the evaluation object should not be reused. +class Evaluator { +public: + Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) + : DL(DL), TLI(TLI) { + ValueStack.emplace_back(); + } + + ~Evaluator() { + for (auto &Tmp : AllocaTmps) + // If there are still users of the alloca, the program is doing something + // silly, e.g. storing the address of the alloca somewhere and using it + // later. Since this is undefined, we'll just make it be null. + if (!Tmp->use_empty()) + Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); + } + + /// Evaluate a call to function F, returning true if successful, false if we + /// can't evaluate it. ActualArgs contains the formal arguments for the + /// function. + bool EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl<Constant*> &ActualArgs); + + /// Evaluate all instructions in block BB, returning true if successful, false + /// if we can't evaluate it. NewBB returns the next BB that control flows + /// into, or null upon return. + bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB); + + Constant *getVal(Value *V) { + if (Constant *CV = dyn_cast<Constant>(V)) return CV; + Constant *R = ValueStack.back().lookup(V); + assert(R && "Reference to an uncomputed value!"); + return R; + } + + void setVal(Value *V, Constant *C) { + ValueStack.back()[V] = C; + } + + const DenseMap<Constant*, Constant*> &getMutatedMemory() const { + return MutatedMemory; + } + + const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const { + return Invariants; + } + +private: + Constant *ComputeLoadResult(Constant *P); + + /// As we compute SSA register values, we store their contents here. The back + /// of the deque contains the current function and the stack contains the + /// values in the calling frames. + std::deque<DenseMap<Value*, Constant*>> ValueStack; + + /// This is used to detect recursion. In pathological situations we could hit + /// exponential behavior, but at least there is nothing unbounded. + SmallVector<Function*, 4> CallStack; + + /// For each store we execute, we update this map. Loads check this to get + /// the most up-to-date value. If evaluation is successful, this state is + /// committed to the process. + DenseMap<Constant*, Constant*> MutatedMemory; + + /// To 'execute' an alloca, we create a temporary global variable to represent + /// its body. This vector is needed so we can delete the temporary globals + /// when we are done. + SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps; + + /// These global variables have been marked invariant by the static + /// constructor. + SmallPtrSet<GlobalVariable*, 8> Invariants; + + /// These are constants we have checked and know to be simple enough to live + /// in a static initializer of a global. + SmallPtrSet<Constant*, 8> SimpleConstants; + + const DataLayout &DL; + const TargetLibraryInfo *TLI; +}; + +} // anonymous namespace + +/// Return the value that would be computed by a load from P after the stores +/// reflected by 'memory' have been performed. If we can't decide, return null. +Constant *Evaluator::ComputeLoadResult(Constant *P) { + // If this memory location has been recently stored, use the stored value: it + // is the most up-to-date. + DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P); + if (I != MutatedMemory.end()) return I->second; + + // Access it. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { + if (GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + return nullptr; + } + + // Handle a constantexpr getelementptr. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0))) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + if (GV->hasDefinitiveInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + + return nullptr; // don't know how to evaluate. +} + +/// Evaluate all instructions in block BB, returning true if successful, false +/// if we can't evaluate it. NewBB returns the next BB that control flows into, +/// or null upon return. +bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, + BasicBlock *&NextBB) { + // This is the main evaluation loop. + while (1) { + Constant *InstResult = nullptr; + + DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { + if (!SI->isSimple()) { + DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + Constant *Ptr = getVal(SI->getOperand(1)); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); + DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + } + if (!isSimpleEnoughPointerToCommit(Ptr)) { + // If this is too complex for us to commit, reject it. + DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); + return false; + } + + Constant *Val = getVal(SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { + DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val + << "\n"); + return false; + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + if (CE->getOpcode() == Instruction::BitCast) { + DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (StructType *STy = dyn_cast<StructType>(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); + return false; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + + DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); + } + } + + MutatedMemory[Ptr] = Val; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { + InstResult = ConstantExpr::get(BO->getOpcode(), + getVal(BO->getOperand(0)), + getVal(BO->getOperand(1))); + DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult + << "\n"); + } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { + InstResult = ConstantExpr::getCompare(CI->getPredicate(), + getVal(CI->getOperand(0)), + getVal(CI->getOperand(1))); + DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult + << "\n"); + } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { + InstResult = ConstantExpr::getCast(CI->getOpcode(), + getVal(CI->getOperand(0)), + CI->getType()); + DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult + << "\n"); + } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { + InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), + getVal(SI->getOperand(1)), + getVal(SI->getOperand(2))); + DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult + << "\n"); + } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { + InstResult = ConstantExpr::getExtractValue( + getVal(EVI->getAggregateOperand()), EVI->getIndices()); + DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { + InstResult = ConstantExpr::getInsertValue( + getVal(IVI->getAggregateOperand()), + getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); + DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { + Constant *P = getVal(GEP->getOperand(0)); + SmallVector<Constant*, 8> GEPOps; + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); + i != e; ++i) + GEPOps.push_back(getVal(*i)); + InstResult = + ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); + DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult + << "\n"); + } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { + + if (!LI->isSimple()) { + DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + + Constant *Ptr = getVal(LI->getOperand(0)); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); + DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " << *Ptr << "\n"); + } + InstResult = ComputeLoadResult(Ptr); + if (!InstResult) { + DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. + } + + DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { + if (AI->isArrayAllocation()) { + DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. + } + Type *Ty = AI->getType()->getElementType(); + AllocaTmps.push_back( + make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, + UndefValue::get(Ty), AI->getName())); + InstResult = AllocaTmps.back().get(); + DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); + } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { + CallSite CS(&*CurInst); + + // Debug info can safely be ignored here. + if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { + DEBUG(dbgs() << "Ignoring debug info.\n"); + ++CurInst; + continue; + } + + // Cannot handle inline asm. + if (isa<InlineAsm>(CS.getCalledValue())) { + DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { + if (MSI->isVolatile()) { + DEBUG(dbgs() << "Can not optimize a volatile memset " << + "intrinsic.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + Constant *Val = getVal(MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Ptr)); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; + } + } + + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); + ++CurInst; + continue; + } + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + // We don't insert an entry into Values, as it doesn't have a + // meaningful return value. + if (!II->use_empty()) { + DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); + return false; + } + ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); + Value *PtrArg = getVal(II->getArgOperand(1)); + Value *Ptr = PtrArg->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { + Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); + if (!Size->isAllOnesValue() && + Size->getValue().getLimitedValue() >= + DL.getTypeStoreSize(ElemTy)) { + Invariants.insert(GV); + DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + << "\n"); + } else { + DEBUG(dbgs() << "Found a global var, but can not treat it as an " + "invariant.\n"); + } + } + // Continue even if we do nothing. + ++CurInst; + continue; + } else if (II->getIntrinsicID() == Intrinsic::assume) { + DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + ++CurInst; + continue; + } + + DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); + return false; + } + + // Resolve function pointers. + Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); + if (!Callee || Callee->mayBeOverridden()) { + DEBUG(dbgs() << "Can not resolve function pointer.\n"); + return false; // Cannot resolve. + } + + SmallVector<Constant*, 8> Formals; + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) + Formals.push_back(getVal(*i)); + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + InstResult = C; + DEBUG(dbgs() << "Constant folded function call. Result: " << + *InstResult << "\n"); + } else { + DEBUG(dbgs() << "Can not constant fold function call.\n"); + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) { + DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); + return false; + } + + Constant *RetVal = nullptr; + // Execute the call, if successful, use the return value. + ValueStack.emplace_back(); + if (!EvaluateFunction(Callee, RetVal, Formals)) { + DEBUG(dbgs() << "Failed to evaluate function.\n"); + return false; + } + ValueStack.pop_back(); + InstResult = RetVal; + + if (InstResult) { + DEBUG(dbgs() << "Successfully evaluated function. Result: " << + InstResult << "\n\n"); + } else { + DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + } + } + } else if (isa<TerminatorInst>(CurInst)) { + DEBUG(dbgs() << "Found a terminator instruction.\n"); + + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { + if (BI->isUnconditional()) { + NextBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast<ConstantInt>(getVal(BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NextBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { + ConstantInt *Val = + dyn_cast<ConstantInt>(getVal(SI->getCondition())); + if (!Val) return false; // Cannot determine. + NextBB = SI->findCaseValue(Val).getCaseSuccessor(); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { + Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); + if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) + NextBB = BA->getBasicBlock(); + else + return false; // Cannot determine. + } else if (isa<ReturnInst>(CurInst)) { + NextBB = nullptr; + } else { + // invoke, unwind, resume, unreachable. + DEBUG(dbgs() << "Can not handle terminator."); + return false; // Cannot handle this terminator. + } + + // We succeeded at evaluating this block! + DEBUG(dbgs() << "Successfully evaluated block.\n"); + return true; + } else { + // Did not know how to evaluate this! + DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." + "\n"); + return false; + } + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, DL, TLI); + + setVal(&*CurInst, InstResult); + } + + // If we just processed an invoke, we finished evaluating the block. + if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { + NextBB = II->getNormalDest(); + DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); + return true; + } + + // Advance program counter. + ++CurInst; + } +} + +/// Evaluate a call to function F, returning true if successful, false if we +/// can't evaluate it. ActualArgs contains the formal arguments for the +/// function. +bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl<Constant*> &ActualArgs) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) + return false; + + CallStack.push_back(F); + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + setVal(&*AI, ActualArgs[ArgNo]); + + // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + // we can only evaluate any one basic block at most once. This set keeps + // track of what we have executed so we can detect recursive cases etc. + SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; + + // CurBB - The current basic block we're evaluating. + BasicBlock *CurBB = &F->front(); + + BasicBlock::iterator CurInst = CurBB->begin(); + + while (1) { + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. + DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + + if (!EvaluateBlock(CurInst, NextBB)) + return false; + + if (!NextBB) { + // Successfully running until there's no next block means that we found + // the return. Fill it the return value and pop the call stack. + ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); + if (RI->getNumOperands()) + RetVal = getVal(RI->getOperand(0)); + CallStack.pop_back(); + return true; + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NextBB).second) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + PHINode *PN = nullptr; + for (CurInst = NextBB->begin(); + (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) + setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); + + // Advance to the next block. + CurBB = NextBB; + } +} + +/// Evaluate static constructors in the function, if we can. Return true if we +/// can, false otherwise. +static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + // Call the function. + Evaluator Eval(DL, TLI); + Constant *RetValDummy; + bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, + SmallVector<Constant*, 0>()); + + if (EvalSuccess) { + ++NumCtorsEvaluated; + + // We succeeded at evaluation: commit the result. + DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " << Eval.getMutatedMemory().size() + << " stores.\n"); + for (DenseMap<Constant*, Constant*>::const_iterator I = + Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end(); + I != E; ++I) + CommitValueTo(I->second, I->first); + for (GlobalVariable *GV : Eval.getInvariants()) + GV->setConstant(true); + } + + return EvalSuccess; +} + +static int compareNames(Constant *const *A, Constant *const *B) { + return (*A)->stripPointerCasts()->getName().compare( + (*B)->stripPointerCasts()->getName()); +} + +static void setUsedInitializer(GlobalVariable &V, + const SmallPtrSet<GlobalValue *, 8> &Init) { + if (Init.empty()) { + V.eraseFromParent(); + return; + } + + // Type of pointer to the array of pointers. + PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); + + SmallVector<llvm::Constant *, 8> UsedArray; + for (GlobalValue *GV : Init) { + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); + UsedArray.push_back(Cast); + } + // Sort to get deterministic order. + array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); + ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size()); + + Module *M = V.getParent(); + V.removeFromParent(); + GlobalVariable *NV = + new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), ""); + NV->takeName(&V); + NV->setSection("llvm.metadata"); + delete &V; +} + +namespace { +/// An easy to access representation of llvm.used and llvm.compiler.used. +class LLVMUsed { + SmallPtrSet<GlobalValue *, 8> Used; + SmallPtrSet<GlobalValue *, 8> CompilerUsed; + GlobalVariable *UsedV; + GlobalVariable *CompilerUsedV; + +public: + LLVMUsed(Module &M) { + UsedV = collectUsedGlobalVariables(M, Used, false); + CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true); + } + typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator; + typedef iterator_range<iterator> used_iterator_range; + iterator usedBegin() { return Used.begin(); } + iterator usedEnd() { return Used.end(); } + used_iterator_range used() { + return used_iterator_range(usedBegin(), usedEnd()); + } + iterator compilerUsedBegin() { return CompilerUsed.begin(); } + iterator compilerUsedEnd() { return CompilerUsed.end(); } + used_iterator_range compilerUsed() { + return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); + } + bool usedCount(GlobalValue *GV) const { return Used.count(GV); } + bool compilerUsedCount(GlobalValue *GV) const { + return CompilerUsed.count(GV); + } + bool usedErase(GlobalValue *GV) { return Used.erase(GV); } + bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } + bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } + bool compilerUsedInsert(GlobalValue *GV) { + return CompilerUsed.insert(GV).second; + } + + void syncVariablesAndSets() { + if (UsedV) + setUsedInitializer(*UsedV, Used); + if (CompilerUsedV) + setUsedInitializer(*CompilerUsedV, CompilerUsed); + } +}; +} + +static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { + if (GA.use_empty()) // No use at all. + return false; + + assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && + "We should have removed the duplicated " + "element from llvm.compiler.used"); + if (!GA.hasOneUse()) + // Strictly more than one use. So at least one is not in llvm.used and + // llvm.compiler.used. + return true; + + // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. + return !U.usedCount(&GA) && !U.compilerUsedCount(&GA); +} + +static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V, + const LLVMUsed &U) { + unsigned N = 2; + assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) && + "We should have removed the duplicated " + "element from llvm.compiler.used"); + if (U.usedCount(&V) || U.compilerUsedCount(&V)) + ++N; + return V.hasNUsesOrMore(N); +} + +static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) { + if (!GA.hasLocalLinkage()) + return true; + + return U.usedCount(&GA) || U.compilerUsedCount(&GA); +} + +static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, + bool &RenameTarget) { + RenameTarget = false; + bool Ret = false; + if (hasUseOtherThanLLVMUsed(GA, U)) + Ret = true; + + // If the alias is externally visible, we may still be able to simplify it. + if (!mayHaveOtherReferences(GA, U)) + return Ret; + + // If the aliasee has internal linkage, give it the name and linkage + // of the alias, and delete the alias. This turns: + // define internal ... @f(...) + // @a = alias ... @f + // into: + // define ... @a(...) + Constant *Aliasee = GA.getAliasee(); + GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); + if (!Target->hasLocalLinkage()) + return Ret; + + // Do not perform the transform if multiple aliases potentially target the + // aliasee. This check also ensures that it is safe to replace the section + // and other attributes of the aliasee with those of the alias. + if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U)) + return Ret; + + RenameTarget = true; + return true; +} + +bool GlobalOpt::OptimizeGlobalAliases(Module &M) { + bool Changed = false; + LLVMUsed Used(M); + + for (GlobalValue *GV : Used.used()) + Used.compilerUsedErase(GV); + + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E;) { + GlobalAlias *J = &*I++; + + // Aliases without names cannot be referenced outside this module. + if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) + J->setLinkage(GlobalValue::InternalLinkage); + + if (deleteIfDead(*J)) { + Changed = true; + continue; + } + + // If the aliasee may change at link time, nothing can be done - bail out. + if (J->mayBeOverridden()) + continue; + + Constant *Aliasee = J->getAliasee(); + GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); + // We can't trivially replace the alias with the aliasee if the aliasee is + // non-trivial in some way. + // TODO: Try to handle non-zero GEPs of local aliasees. + if (!Target) + continue; + Target->removeDeadConstantUsers(); + + // Make all users of the alias use the aliasee instead. + bool RenameTarget; + if (!hasUsesToReplace(*J, Used, RenameTarget)) + continue; + + J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); + ++NumAliasesResolved; + Changed = true; + + if (RenameTarget) { + // Give the aliasee the name, linkage and other attributes of the alias. + Target->takeName(&*J); + Target->setLinkage(J->getLinkage()); + Target->setVisibility(J->getVisibility()); + Target->setDLLStorageClass(J->getDLLStorageClass()); + + if (Used.usedErase(&*J)) + Used.usedInsert(Target); + + if (Used.compilerUsedErase(&*J)) + Used.compilerUsedInsert(Target); + } else if (mayHaveOtherReferences(*J, Used)) + continue; + + // Delete the alias. + M.getAliasList().erase(J); + ++NumAliasesRemoved; + Changed = true; + } + + Used.syncVariablesAndSets(); + + return Changed; +} + +static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::cxa_atexit)) + return nullptr; + + Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); + + if (!Fn) + return nullptr; + + FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return nullptr; + + return Fn; +} + +/// Returns whether the given function is an empty C++ destructor and can +/// therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and +/// other side-effect free instructions. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet<const Function *, 8> &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore debug intrinsics. + if (isa<DbgInfoIntrinsic>(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn).second) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa<ReturnInst>(*I)) + return true; // We're done. + else if (I->mayHaveSideEffects()) + return false; // Destructor with side effects, bail. + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); + I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast<CallInst>(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet<const Function *, 8> CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + +bool GlobalOpt::runOnModule(Module &M) { + bool Changed = false; + + auto &DL = M.getDataLayout(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + + bool LocalChange = true; + while (LocalChange) { + LocalChange = false; + + NotDiscardableComdats.clear(); + for (const GlobalVariable &GV : M.globals()) + if (const Comdat *C = GV.getComdat()) + if (!GV.isDiscardableIfUnused() || !GV.use_empty()) + NotDiscardableComdats.insert(C); + for (Function &F : M) + if (const Comdat *C = F.getComdat()) + if (!F.isDefTriviallyDead()) + NotDiscardableComdats.insert(C); + for (GlobalAlias &GA : M.aliases()) + if (const Comdat *C = GA.getComdat()) + if (!GA.isDiscardableIfUnused() || !GA.use_empty()) + NotDiscardableComdats.insert(C); + + // Delete functions that are trivially dead, ccc -> fastcc + LocalChange |= OptimizeFunctions(M); + + // Optimize global_ctors list. + LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { + return EvaluateStaticConstructor(F, DL, TLI); + }); + + // Optimize non-address-taken globals. + LocalChange |= OptimizeGlobalVars(M); + + // Resolve aliases, when possible. + LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors if they are not removed + // already. + Function *CXAAtExitFn = FindCXAAtExit(M, TLI); + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + + Changed |= LocalChange; + } + + // TODO: Move all global ctors functions to the end of the module for code + // layout. + + return Changed; +} + diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp new file mode 100644 index 0000000..af541d1 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -0,0 +1,279 @@ +//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements an _extremely_ simple interprocedural constant +// propagation pass. It could certainly be improved in many different ways, +// like using a worklist. This pass makes arguments dead, but does not remove +// them. The existing dead argument elimination pass should be run after this +// to clean up the mess. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "ipconstprop" + +STATISTIC(NumArgumentsProped, "Number of args turned into constants"); +STATISTIC(NumReturnValProped, "Number of return values turned into constants"); + +namespace { + /// IPCP - The interprocedural constant propagation pass + /// + struct IPCP : public ModulePass { + static char ID; // Pass identification, replacement for typeid + IPCP() : ModulePass(ID) { + initializeIPCPPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + private: + bool PropagateConstantsIntoArguments(Function &F); + bool PropagateConstantReturn(Function &F); + }; +} + +char IPCP::ID = 0; +INITIALIZE_PASS(IPCP, "ipconstprop", + "Interprocedural constant propagation", false, false) + +ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } + +bool IPCP::runOnModule(Module &M) { + bool Changed = false; + bool LocalChange = true; + + // FIXME: instead of using smart algorithms, we just iterate until we stop + // making changes. + while (LocalChange) { + LocalChange = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) { + // Delete any klingons. + I->removeDeadConstantUsers(); + if (I->hasLocalLinkage()) + LocalChange |= PropagateConstantsIntoArguments(*I); + Changed |= PropagateConstantReturn(*I); + } + Changed |= LocalChange; + } + return Changed; +} + +/// PropagateConstantsIntoArguments - Look at all uses of the specified +/// function. If all uses are direct call sites, and all pass a particular +/// constant in for an argument, propagate that constant in as the argument. +/// +bool IPCP::PropagateConstantsIntoArguments(Function &F) { + if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. + + // For each argument, keep track of its constant value and whether it is a + // constant or not. The bool is driven to true when found to be non-constant. + SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants; + ArgumentConstants.resize(F.arg_size()); + + unsigned NumNonconstant = 0; + for (Use &U : F.uses()) { + User *UR = U.getUser(); + // Ignore blockaddress uses. + if (isa<BlockAddress>(UR)) continue; + + // Used by a non-instruction, or not the callee of a function, do not + // transform. + if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR)) + return false; + + CallSite CS(cast<Instruction>(UR)); + if (!CS.isCallee(&U)) + return false; + + // Check out all of the potentially constant arguments. Note that we don't + // inspect varargs here. + CallSite::arg_iterator AI = CS.arg_begin(); + Function::arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; + ++i, ++AI, ++Arg) { + + // If this argument is known non-constant, ignore it. + if (ArgumentConstants[i].second) + continue; + + Constant *C = dyn_cast<Constant>(*AI); + if (C && ArgumentConstants[i].first == nullptr) { + ArgumentConstants[i].first = C; // First constant seen. + } else if (C && ArgumentConstants[i].first == C) { + // Still the constant value we think it is. + } else if (*AI == &*Arg) { + // Ignore recursive calls passing argument down. + } else { + // Argument became non-constant. If all arguments are non-constant now, + // give up on this function. + if (++NumNonconstant == ArgumentConstants.size()) + return false; + ArgumentConstants[i].second = true; + } + } + } + + // If we got to this point, there is a constant argument! + assert(NumNonconstant != ArgumentConstants.size()); + bool MadeChange = false; + Function::arg_iterator AI = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { + // Do we have a constant argument? + if (ArgumentConstants[i].second || AI->use_empty() || + AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) + continue; + + Value *V = ArgumentConstants[i].first; + if (!V) V = UndefValue::get(AI->getType()); + AI->replaceAllUsesWith(V); + ++NumArgumentsProped; + MadeChange = true; + } + return MadeChange; +} + + +// Check to see if this function returns one or more constants. If so, replace +// all callers that use those return values with the constant value. This will +// leave in the actual return values and instructions, but deadargelim will +// clean that up. +// +// Additionally if a function always returns one of its arguments directly, +// callers will be updated to use the value they pass in directly instead of +// using the return value. +bool IPCP::PropagateConstantReturn(Function &F) { + if (F.getReturnType()->isVoidTy()) + return false; // No return value. + + // If this function could be overridden later in the link stage, we can't + // propagate information about its results into callers. + if (F.mayBeOverridden()) + return false; + + // Check to see if this function returns a constant. + SmallVector<Value *,4> RetVals; + StructType *STy = dyn_cast<StructType>(F.getReturnType()); + if (STy) + for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) + RetVals.push_back(UndefValue::get(STy->getElementType(i))); + else + RetVals.push_back(UndefValue::get(F.getReturnType())); + + unsigned NumNonConstant = 0; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + for (unsigned i = 0, e = RetVals.size(); i != e; ++i) { + // Already found conflicting return values? + Value *RV = RetVals[i]; + if (!RV) + continue; + + // Find the returned value + Value *V; + if (!STy) + V = RI->getOperand(0); + else + V = FindInsertedValue(RI->getOperand(0), i); + + if (V) { + // Ignore undefs, we can change them into anything + if (isa<UndefValue>(V)) + continue; + + // Try to see if all the rets return the same constant or argument. + if (isa<Constant>(V) || isa<Argument>(V)) { + if (isa<UndefValue>(RV)) { + // No value found yet? Try the current one. + RetVals[i] = V; + continue; + } + // Returning the same value? Good. + if (RV == V) + continue; + } + } + // Different or no known return value? Don't propagate this return + // value. + RetVals[i] = nullptr; + // All values non-constant? Stop looking. + if (++NumNonConstant == RetVals.size()) + return false; + } + } + + // If we got here, the function returns at least one constant value. Loop + // over all users, replacing any uses of the return value with the returned + // constant. + bool MadeChange = false; + for (Use &U : F.uses()) { + CallSite CS(U.getUser()); + Instruction* Call = CS.getInstruction(); + + // Not a call instruction or a call instruction that's not calling F + // directly? + if (!Call || !CS.isCallee(&U)) + continue; + + // Call result not used? + if (Call->use_empty()) + continue; + + MadeChange = true; + + if (!STy) { + Value* New = RetVals[0]; + if (Argument *A = dyn_cast<Argument>(New)) + // Was an argument returned? Then find the corresponding argument in + // the call instruction and use that. + New = CS.getArgument(A->getArgNo()); + Call->replaceAllUsesWith(New); + continue; + } + + for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) { + Instruction *Ins = cast<Instruction>(*I); + + // Increment now, so we can remove the use + ++I; + + // Find the index of the retval to replace with + int index = -1; + if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins)) + if (EV->hasIndices()) + index = *EV->idx_begin(); + + // If this use uses a specific return value, and we have a replacement, + // replace it. + if (index != -1) { + Value *New = RetVals[index]; + if (New) { + if (Argument *A = dyn_cast<Argument>(New)) + // Was an argument returned? Then find the corresponding argument in + // the call instruction and use that. + New = CS.getArgument(A->getArgNo()); + Ins->replaceAllUsesWith(New); + Ins->eraseFromParent(); + } + } + } + } + + if (MadeChange) ++NumReturnValProped; + return MadeChange; +} diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp new file mode 100644 index 0000000..7ea6c08 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -0,0 +1,118 @@ +//===-- IPO.cpp -----------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common infrastructure (including C bindings) for +// libLLVMIPO.a, which implements several transformations over the LLVM +// intermediate representation. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Initialization.h" +#include "llvm-c/Transforms/IPO.h" +#include "llvm/InitializePasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/IPO.h" + +using namespace llvm; + +void llvm::initializeIPO(PassRegistry &Registry) { + initializeArgPromotionPass(Registry); + initializeConstantMergePass(Registry); + initializeCrossDSOCFIPass(Registry); + initializeDAEPass(Registry); + initializeDAHPass(Registry); + initializeForceFunctionAttrsLegacyPassPass(Registry); + initializeFunctionAttrsPass(Registry); + initializeGlobalDCEPass(Registry); + initializeGlobalOptPass(Registry); + initializeIPCPPass(Registry); + initializeAlwaysInlinerPass(Registry); + initializeSimpleInlinerPass(Registry); + initializeInferFunctionAttrsLegacyPassPass(Registry); + initializeInternalizePassPass(Registry); + initializeLoopExtractorPass(Registry); + initializeBlockExtractorPassPass(Registry); + initializeSingleLoopExtractorPass(Registry); + initializeLowerBitSetsPass(Registry); + initializeMergeFunctionsPass(Registry); + initializePartialInlinerPass(Registry); + initializePruneEHPass(Registry); + initializeStripDeadPrototypesLegacyPassPass(Registry); + initializeStripSymbolsPass(Registry); + initializeStripDebugDeclarePass(Registry); + initializeStripDeadDebugInfoPass(Registry); + initializeStripNonDebugSymbolsPass(Registry); + initializeBarrierNoopPass(Registry); + initializeEliminateAvailableExternallyPass(Registry); + initializeSampleProfileLoaderPass(Registry); + initializeFunctionImportPassPass(Registry); +} + +void LLVMInitializeIPO(LLVMPassRegistryRef R) { + initializeIPO(*unwrap(R)); +} + +void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createArgumentPromotionPass()); +} + +void LLVMAddConstantMergePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createConstantMergePass()); +} + +void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadArgEliminationPass()); +} + +void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionAttrsPass()); +} + +void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionInliningPass()); +} + +void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(llvm::createAlwaysInlinerPass()); +} + +void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGlobalDCEPass()); +} + +void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGlobalOptimizerPass()); +} + +void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPConstantPropagationPass()); +} + +void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPruneEHPass()); +} + +void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPSCCPPass()); +} + +void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { + std::vector<const char *> Export; + if (AllButMain) + Export.push_back("main"); + unwrap(PM)->add(createInternalizePass(Export)); +} + +void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStripDeadPrototypesPass()); +} + +void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStripSymbolsPass()); +} diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp new file mode 100644 index 0000000..d02c861 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -0,0 +1,937 @@ +//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "inferattrs" + +STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); +STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); +STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); + +static bool setDoesNotAccessMemory(Function &F) { + if (F.doesNotAccessMemory()) + return false; + F.setDoesNotAccessMemory(); + ++NumReadNone; + return true; +} + +static bool setOnlyReadsMemory(Function &F) { + if (F.onlyReadsMemory()) + return false; + F.setOnlyReadsMemory(); + ++NumReadOnly; + return true; +} + +static bool setDoesNotThrow(Function &F) { + if (F.doesNotThrow()) + return false; + F.setDoesNotThrow(); + ++NumNoUnwind; + return true; +} + +static bool setDoesNotCapture(Function &F, unsigned n) { + if (F.doesNotCapture(n)) + return false; + F.setDoesNotCapture(n); + ++NumNoCapture; + return true; +} + +static bool setOnlyReadsMemory(Function &F, unsigned n) { + if (F.onlyReadsMemory(n)) + return false; + F.setOnlyReadsMemory(n); + ++NumReadOnlyArg; + return true; +} + +static bool setDoesNotAlias(Function &F, unsigned n) { + if (F.doesNotAlias(n)) + return false; + F.setDoesNotAlias(n); + ++NumNoAlias; + return true; +} + +/// Analyze the name and prototype of the given function and set any applicable +/// attributes. +/// +/// Returns true if any attributes were set and false otherwise. +static bool inferPrototypeAttributes(Function &F, + const TargetLibraryInfo &TLI) { + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + bool Changed = false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strcmp: // 0,1 + case LibFunc::strspn: // 0,1 + case LibFunc::strncmp: // 0,1 + case LibFunc::strcspn: // 0,1 + case LibFunc::strcoll: // 0,1 + case LibFunc::strcasecmp: // 0,1 + case LibFunc::strncasecmp: // + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::scanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::sscanf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::sprintf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::system: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::mkdir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::read: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::rewind: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::rmdir: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::rename: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::chmod: + case LibFunc::chown: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F); + return Changed; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 3); + return Changed; + case LibFunc::fread: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fputs: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fscanf: + case LibFunc::fprintf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::gets: + case LibFunc::getchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::uname: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::unlink: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::pread: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "pread" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vsscanf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAccessMemory(F); + return Changed; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::gettimeofday: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + // Currently some platforms have the restrict keyword on the arguments to + // gettimeofday. To be conservative, do not add noalias to gettimeofday's + // arguments. + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + + default: + // FIXME: It'd be really nice to cover all the library functions we're + // aware of here. + return false; + } +} + +static bool inferAllPrototypeAttributes(Module &M, + const TargetLibraryInfo &TLI) { + bool Changed = false; + + for (Function &F : M.functions()) + // We only infer things using the prototype if the definition isn't around + // to analyze directly. + if (F.isDeclaration()) + Changed |= inferPrototypeAttributes(F, TLI); + + return Changed; +} + +PreservedAnalyses InferFunctionAttrsPass::run(Module &M, + AnalysisManager<Module> *AM) { + auto &TLI = AM->getResult<TargetLibraryAnalysis>(M); + + if (!inferAllPrototypeAttributes(M, TLI)) + // If we didn't infer anything, preserve all analyses. + return PreservedAnalyses::all(); + + // Otherwise, we may have changed fundamental function attributes, so clear + // out all the passes. + return PreservedAnalyses::none(); +} + +namespace { +struct InferFunctionAttrsLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + InferFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeInferFunctionAttrsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + } + + bool runOnModule(Module &M) override { + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + return inferAllPrototypeAttributes(M, TLI); + } +}; +} + +char InferFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs", + "Infer set function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs", + "Infer set function attributes", false, false) + +Pass *llvm::createInferFunctionAttrsLegacyPass() { + return new InferFunctionAttrsLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp new file mode 100644 index 0000000..1704bfe --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -0,0 +1,100 @@ +//===- InlineAlways.cpp - Code to inline always_inline functions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a custom inliner that handles only functions that +// are marked as "always inline". +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/IPO/InlinerPass.h" + +using namespace llvm; + +#define DEBUG_TYPE "inline" + +namespace { + +/// \brief Inliner pass which only handles "always inline" functions. +class AlwaysInliner : public Inliner { + +public: + // Use extremely low threshold. + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } + + AlwaysInliner(bool InsertLifetime) + : Inliner(ID, -2000000000, InsertLifetime) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + InlineCost getInlineCost(CallSite CS) override; + + using llvm::Pass::doFinalization; + bool doFinalization(CallGraph &CG) override { + return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); + } +}; + +} + +char AlwaysInliner::ID = 0; +INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) + +Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } + +Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { + return new AlwaysInliner(InsertLifetime); +} + +/// \brief Get the inline cost for the always-inliner. +/// +/// The always inliner *only* handles functions which are marked with the +/// attribute to force inlining. As such, it is dramatically simpler and avoids +/// using the powerful (but expensive) inline cost analysis. Instead it uses +/// a very simple and boring direct walk of the instructions looking for +/// impossible-to-inline constructs. +/// +/// Note, it would be possible to go to some lengths to cache the information +/// computed here, but as we only expect to do this for relatively few and +/// small functions which have the explicit attribute to force inlining, it is +/// likely not worth it in practice. +InlineCost AlwaysInliner::getInlineCost(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Only inline direct calls to functions with always-inline attributes + // that are viable for inlining. FIXME: We shouldn't even get here for + // declarations. + if (Callee && !Callee->isDeclaration() && + CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) + return InlineCost::getAlways(); + + return InlineCost::getNever(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp new file mode 100644 index 0000000..45609f8 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -0,0 +1,110 @@ +//===- InlineSimple.cpp - Code to perform simple function inlining --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bottom-up inlining of functions into callees. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" + +using namespace llvm; + +#define DEBUG_TYPE "inline" + +namespace { + +/// \brief Actual inliner pass implementation. +/// +/// The common implementation of the inlining logic is shared between this +/// inliner pass and the always inliner pass. The two passes use different cost +/// analyses to determine when to inline. +class SimpleInliner : public Inliner { + +public: + SimpleInliner() : Inliner(ID) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + + SimpleInliner(int Threshold) + : Inliner(ID, Threshold, /*InsertLifetime*/ true) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + InlineCost getInlineCost(CallSite CS) override { + Function *Callee = CS.getCalledFunction(); + TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); + return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT); + } + + bool runOnSCC(CallGraphSCC &SCC) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + TargetTransformInfoWrapperPass *TTIWP; +}; + +static int computeThresholdFromOptLevels(unsigned OptLevel, + unsigned SizeOptLevel) { + if (OptLevel > 2) + return 275; + if (SizeOptLevel == 1) // -Os + return 75; + if (SizeOptLevel == 2) // -Oz + return 25; + return 225; +} + +} // end anonymous namespace + +char SimpleInliner::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) + +Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } + +Pass *llvm::createFunctionInliningPass(int Threshold) { + return new SimpleInliner(Threshold); +} + +Pass *llvm::createFunctionInliningPass(unsigned OptLevel, + unsigned SizeOptLevel) { + return new SimpleInliner( + computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); +} + +bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { + TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); + return Inliner::runOnSCC(SCC); +} + +void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); + Inliner::getAnalysisUsage(AU); +} diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp new file mode 100644 index 0000000..bbe5f876 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -0,0 +1,741 @@ +//===- Inliner.cpp - Code common to all inliners --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the mechanics required to implement inlining without +// missing any calls and updating the call graph. The decisions of which calls +// are profitable to inline are implemented elsewhere. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +#define DEBUG_TYPE "inline" + +STATISTIC(NumInlined, "Number of functions inlined"); +STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); +STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +STATISTIC(NumMergedAllocas, "Number of allocas merged together"); + +// This weirdly named statistic tracks the number of times that, when attempting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + +static cl::opt<int> +InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, + cl::desc("Control the amount of inlining to perform (default = 225)")); + +static cl::opt<int> +HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), + cl::desc("Threshold for inlining functions with inline hint")); + +// We instroduce this threshold to help performance of instrumentation based +// PGO before we actually hook up inliner with analysis passes such as BPI and +// BFI. +static cl::opt<int> +ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), + cl::desc("Threshold for inlining functions with cold attribute")); + +// Threshold to use when optsize is specified (and there is no -inline-limit). +const int OptSizeThreshold = 75; + +Inliner::Inliner(char &ID) + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) { +} + +Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) + : CallGraphSCCPass(ID), + InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit + : Threshold), + InsertLifetime(InsertLifetime) {} + +/// For this class, we declare that we require and preserve the call graph. +/// If the derived class implements this method, it should +/// always explicitly call the implementation here. +void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + + +typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > +InlinedArrayAllocasTy; + +/// If it is possible to inline the specified call site, +/// do so and update the CallGraph for this operation. +/// +/// This function also does some basic book-keeping to update the IR. The +/// InlinedArrayAllocas map keeps track of any allocas that are already +/// available from other functions inlined into the caller. If we are able to +/// inline this call site we attempt to reuse already available allocas or add +/// any new allocas to the set if not possible. +static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory, bool InsertLifetime) { + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + + // We need to manually construct BasicAA directly in order to disable + // its use of other function analyses. + BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee)); + + // Construct our own AA results for this function. We do this manually to + // work around the limitations of the legacy pass manager. + AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR)); + + // Try to inline the function. Get the list of static allocas that were + // inlined. + if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) + return false; + + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); + + // Look at all of the allocas that we inlined through this call site. If we + // have already inlined other allocas through other calls into this function, + // then we know that they have disjoint lifetimes and that we can merge them. + // + // There are many heuristics possible for merging these allocas, and the + // different options have different tradeoffs. One thing that we *really* + // don't want to hurt is SRoA: once inlining happens, often allocas are no + // longer address taken and so they can be promoted. + // + // Our "solution" for that is to only merge allocas whose outermost type is an + // array type. These are usually not promoted because someone is using a + // variable index into them. These are also often the most important ones to + // merge. + // + // A better solution would be to have real memory lifetime markers in the IR + // and not have the inliner do any merging of allocas at all. This would + // allow the backend to do proper stack slot coloring of all allocas that + // *actually make it to the backend*, which is really what we want. + // + // Because we don't have this information, we do this simple and useful hack. + // + SmallPtrSet<AllocaInst*, 16> UsedAllocas; + + // When processing our SCC, check to see if CS was inlined from some other + // call site. For example, if we're processing "A" in this code: + // A() { B() } + // B() { x = alloca ... C() } + // C() { y = alloca ... } + // Assume that C was not inlined into B initially, and so we're processing A + // and decide to inline B into A. Doing this makes an alloca available for + // reuse and makes a callsite (C) available for inlining. When we process + // the C call site we don't want to do any alloca merging between X and Y + // because their scopes are not disjoint. We could make this smarter by + // keeping track of the inline history for each alloca in the + // InlinedArrayAllocas but this isn't likely to be a significant win. + if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. + return true; + + // Loop over all the allocas we have so far and see if they can be merged with + // a previously inlined alloca. If not, remember that we had it. + for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); + AllocaNo != e; ++AllocaNo) { + AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; + + // Don't bother trying to merge array allocations (they will usually be + // canonicalized to be an allocation *of* an array), or allocations whose + // type is not itself an array (because we're afraid of pessimizing SRoA). + ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + if (!ATy || AI->isArrayAllocation()) + continue; + + // Get the list of all available allocas for this array type. + std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; + + // Loop over the allocas in AllocasForType to see if we can reuse one. Note + // that we have to be careful not to reuse the same "available" alloca for + // multiple different allocas that we just inlined, we use the 'UsedAllocas' + // set to keep track of which "available" allocas are being used by this + // function. Also, AllocasForType can be empty of course! + bool MergedAwayAlloca = false; + for (AllocaInst *AvailableAlloca : AllocasForType) { + + unsigned Align1 = AI->getAlignment(), + Align2 = AvailableAlloca->getAlignment(); + + // The available alloca has to be in the right function, not in some other + // function in this SCC. + if (AvailableAlloca->getParent() != AI->getParent()) + continue; + + // If the inlined function already uses this alloca then we can't reuse + // it. + if (!UsedAllocas.insert(AvailableAlloca).second) + continue; + + // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare + // success! + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " + << *AvailableAlloca << '\n'); + + // Move affected dbg.declare calls immediately after the new alloca to + // avoid the situation when a dbg.declare preceeds its alloca. + if (auto *L = LocalAsMetadata::getIfExists(AI)) + if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) + for (User *U : MDV->users()) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) + DDI->moveBefore(AvailableAlloca->getNextNode()); + + AI->replaceAllUsesWith(AvailableAlloca); + + if (Align1 != Align2) { + if (!Align1 || !Align2) { + const DataLayout &DL = Caller->getParent()->getDataLayout(); + unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); + + Align1 = Align1 ? Align1 : TypeAlign; + Align2 = Align2 ? Align2 : TypeAlign; + } + + if (Align1 > Align2) + AvailableAlloca->setAlignment(AI->getAlignment()); + } + + AI->eraseFromParent(); + MergedAwayAlloca = true; + ++NumMergedAllocas; + IFI.StaticAllocas[AllocaNo] = nullptr; + break; + } + + // If we already nuked the alloca, we're done with it. + if (MergedAwayAlloca) + continue; + + // If we were unable to merge away the alloca either because there are no + // allocas of the right type available or because we reused them all + // already, remember that this alloca came from an inlined function and mark + // it used so we don't reuse it for other allocas from this inline + // operation. + AllocasForType.push_back(AI); + UsedAllocas.insert(AI); + } + + return true; +} + +unsigned Inliner::getInlineThreshold(CallSite CS) const { + int Threshold = InlineThreshold; // -inline-threshold or else selected by + // overall opt level + + // If -inline-threshold is not given, listen to the optsize attribute when it + // would decrease the threshold. + Function *Caller = CS.getCaller(); + bool OptSize = Caller && !Caller->isDeclaration() && + // FIXME: Use Function::optForSize(). + Caller->hasFnAttribute(Attribute::OptimizeForSize); + if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && + OptSizeThreshold < Threshold) + Threshold = OptSizeThreshold; + + Function *Callee = CS.getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + return Threshold; + + // If profile information is available, use that to adjust threshold of hot + // and cold functions. + // FIXME: The heuristic used below for determining hotness and coldness are + // based on preliminary SPEC tuning and may not be optimal. Replace this with + // a well-tuned heuristic based on *callsite* hotness and not callee hotness. + uint64_t FunctionCount = 0, MaxFunctionCount = 0; + bool HasPGOCounts = false; + if (Callee->getEntryCount() && + Callee->getParent()->getMaximumFunctionCount()) { + HasPGOCounts = true; + FunctionCount = Callee->getEntryCount().getValue(); + MaxFunctionCount = + Callee->getParent()->getMaximumFunctionCount().getValue(); + } + + // Listen to the inlinehint attribute or profile based hotness information + // when it would increase the threshold and the caller does not need to + // minimize its size. + bool InlineHint = + Callee->hasFnAttribute(Attribute::InlineHint) || + (HasPGOCounts && + FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); + if (InlineHint && HintThreshold > Threshold && + !Caller->hasFnAttribute(Attribute::MinSize)) + Threshold = HintThreshold; + + // Listen to the cold attribute or profile based coldness information + // when it would decrease the threshold. + bool ColdCallee = + Callee->hasFnAttribute(Attribute::Cold) || + (HasPGOCounts && + FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); + // Command line argument for InlineLimit will override the default + // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, + // do not use the default cold threshold even if it is smaller. + if ((InlineLimit.getNumOccurrences() == 0 || + ColdThreshold.getNumOccurrences() > 0) && ColdCallee && + ColdThreshold < Threshold) + Threshold = ColdThreshold; + + return Threshold; +} + +static void emitAnalysis(CallSite CS, const Twine &Msg) { + Function *Caller = CS.getCaller(); + LLVMContext &Ctx = Caller->getContext(); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +} + +/// Return true if the inliner should attempt to inline at the given CallSite. +bool Inliner::shouldInline(CallSite CS) { + InlineCost IC = getInlineCost(CS); + + if (IC.isAlways()) { + DEBUG(dbgs() << " Inlining: cost=always" + << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); + return true; + } + + if (IC.isNever()) { + DEBUG(dbgs() << " NOT Inlining: cost=never" + << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); + return false; + } + + Function *Caller = CS.getCaller(); + if (!IC) { + DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) + << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); + return false; + } + + // Try to detect the case where the current inlining candidate caller (call + // it B) is a static or linkonce-ODR function and is an inlining candidate + // elsewhere, and the current candidate callee (call it C) is large enough + // that inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B into + // its callers. + // + // This only applies to static and linkonce-ODR functions because those are + // expected to be available for inlining in the translation units where they + // are used. Thus we will always have the opportunity to make local inlining + // decisions. Importantly the linkonce-ODR linkage covers inline functions + // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. + if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) { + int TotalSecondaryCost = 0; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); + // This bool tracks what happens if we do NOT inline C into B. + bool callerWillBeRemoved = Caller->hasLocalLinkage(); + // This bool tracks what happens if we DO inline C into B. + bool inliningPreventsSomeOuterInline = false; + for (User *U : Caller->users()) { + CallSite CS2(U); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2.getCalledFunction() != Caller) { + callerWillBeRemoved = false; + continue; + } + + InlineCost IC2 = getInlineCost(CS2); + ++NumCallerCallersAnalyzed; + if (!IC2) { + callerWillBeRemoved = false; + continue; + } + if (IC2.isAlways()) + continue; + + // See if inlining or original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { + inliningPreventsSomeOuterInline = true; + TotalSecondaryCost += IC2.getCost(); + } + } + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (callerWillBeRemoved && !Caller->use_empty()) + TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << IC.getCost() << + ", outer Cost = " << TotalSecondaryCost << '\n'); + emitAnalysis( + CS, Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); + return false; + } + } + + DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) + << ", Call: " << *CS.getInstruction() << '\n'); + emitAnalysis( + CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + + " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); + return true; +} + +/// Return true if the specified inline history ID +/// indicates an inline history that includes the specified function. +static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, + const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { + while (InlineHistoryID != -1) { + assert(unsigned(InlineHistoryID) < InlineHistory.size() && + "Invalid inline history ID"); + if (InlineHistory[InlineHistoryID].first == F) + return true; + InlineHistoryID = InlineHistory[InlineHistoryID].second; + } + return false; +} + +bool Inliner::runOnSCC(CallGraphSCC &SCC) { + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + ACT = &getAnalysis<AssumptionCacheTracker>(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + + SmallPtrSet<Function*, 8> SCCFunctions; + DEBUG(dbgs() << "Inliner visiting SCC:"); + for (CallGraphNode *Node : SCC) { + Function *F = Node->getFunction(); + if (F) SCCFunctions.insert(F); + DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); + } + + // Scan through and identify all call sites ahead of time so that we only + // inline call sites in the original functions, not call sites that result + // from inlining other functions. + SmallVector<std::pair<CallSite, int>, 16> CallSites; + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector<std::pair<Function*, int>, 8> InlineHistory; + + for (CallGraphNode *Node : SCC) { + Function *F = Node->getFunction(); + if (!F) continue; + + for (BasicBlock &BB : *F) + for (Instruction &I : BB) { + CallSite CS(cast<Value>(&I)); + // If this isn't a call, or it is a call to an intrinsic, it can + // never be inlined. + if (!CS || isa<IntrinsicInst>(I)) + continue; + + // If this is a direct call to an external function, we can never inline + // it. If it is an indirect call, inlining may resolve it to be a + // direct call, so we keep it. + if (Function *Callee = CS.getCalledFunction()) + if (Callee->isDeclaration()) + continue; + + CallSites.push_back(std::make_pair(CS, -1)); + } + } + + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); + + // If there are no calls in this function, exit early. + if (CallSites.empty()) + return false; + + // Now that we have all of the call sites, move the ones to functions in the + // current SCC to the end of the list. + unsigned FirstCallInSCC = CallSites.size(); + for (unsigned i = 0; i < FirstCallInSCC; ++i) + if (Function *F = CallSites[i].first.getCalledFunction()) + if (SCCFunctions.count(F)) + std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + + + InlinedArrayAllocasTy InlinedArrayAllocas; + InlineFunctionInfo InlineInfo(&CG, ACT); + + // Now that we have all of the call sites, loop over them and inline them if + // it looks profitable to do so. + bool Changed = false; + bool LocalChange; + do { + LocalChange = false; + // Iterate over the outer loop because inlining functions can cause indirect + // calls to become direct calls. + // CallSites may be modified inside so ranged for loop can not be used. + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { + CallSite CS = CallSites[CSi].first; + + Function *Caller = CS.getCaller(); + Function *Callee = CS.getCalledFunction(); + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { + DEBUG(dbgs() << " -> Deleting dead call: " + << *CS.getInstruction() << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + CS.getInstruction()->eraseFromParent(); + ++NumCallsDeleted; + } else { + // We can only inline direct calls to non-declarations. + if (!Callee || Callee->isDeclaration()) continue; + + // If this call site was obtained by inlining another function, verify + // that the include path for the function did not include the callee + // itself. If so, we'd be recursively inlining the same function, + // which would provide the same callsites, which would cause us to + // infinitely inline. + int InlineHistoryID = CallSites[CSi].second; + if (InlineHistoryID != -1 && + InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) + continue; + + LLVMContext &CallerCtx = Caller->getContext(); + + // Get DebugLoc to report. CS will be invalid after Inliner. + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + + // If the policy determines that we should inline this function, + // try to do so. + if (!shouldInline(CS)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); + continue; + } + + // Attempt to inline the function. + if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, + InlineHistoryID, InsertLifetime)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); + continue; + } + ++NumInlined; + + // Report the inline decision. + emitOptimizationRemark( + CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + " inlined into " + Caller->getName())); + + // If inlining this function gave us any new call sites, throw them + // onto our worklist to process. They are useful inline candidates. + if (!InlineInfo.InlinedCalls.empty()) { + // Create a new inline history entry for this, so that we remember + // that these new callsites came about due to inlining Callee. + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); + + for (Value *Ptr : InlineInfo.InlinedCalls) + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + } + } + + // If we inlined or deleted the last possible call site to the function, + // delete the function body now. + if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && + // TODO: Can remove if in SCC now. + !SCCFunctions.count(Callee) && + + // The function may be apparently dead, but if there are indirect + // callgraph references to the node, we cannot delete it yet, this + // could invalidate the CGSCC iterator. + CG[Callee]->getNumReferences() == 0) { + DEBUG(dbgs() << " -> Deleting dead function: " + << Callee->getName() << "\n"); + CallGraphNode *CalleeNode = CG[Callee]; + + // Remove any call graph edges from the callee to its callees. + CalleeNode->removeAllCalledFunctions(); + + // Removing the node for callee from the call graph and delete it. + delete CG.removeFunctionFromModule(CalleeNode); + ++NumDeleted; + } + + // Remove this call site from the list. If possible, use + // swap/pop_back for efficiency, but do not use it if doing so would + // move a call site to a function in this SCC before the + // 'FirstCallInSCC' barrier. + if (SCC.isSingular()) { + CallSites[CSi] = CallSites.back(); + CallSites.pop_back(); + } else { + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + + Changed = true; + LocalChange = true; + } + } while (LocalChange); + + return Changed; +} + +/// Remove now-dead linkonce functions at the end of +/// processing to avoid breaking the SCC traversal. +bool Inliner::doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG); +} + +/// Remove dead functions that are not included in DNR (Do Not Remove) list. +bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { + SmallVector<CallGraphNode*, 16> FunctionsToRemove; + SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats; + SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive; + + auto RemoveCGN = [&](CallGraphNode *CGN) { + // Remove any call graph edges from the function to its callees. + CGN->removeAllCalledFunctions(); + + // Remove any edges from the external node to the function's call graph + // node. These edges might have been made irrelegant due to + // optimization of the program. + CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); + + // Removing the node for callee from the call graph and delete it. + FunctionsToRemove.push_back(CGN); + }; + + // Scan for all of the functions, looking for ones that should now be removed + // from the program. Insert the dead ones in the FunctionsToRemove set. + for (const auto &I : CG) { + CallGraphNode *CGN = I.second.get(); + Function *F = CGN->getFunction(); + if (!F || F->isDeclaration()) + continue; + + // Handle the case when this function is called and we only want to care + // about always-inline functions. This is a bit of a hack to share code + // between here and the InlineAlways pass. + if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) + continue; + + // If the only remaining users of the function are dead constants, remove + // them. + F->removeDeadConstantUsers(); + + if (!F->isDefTriviallyDead()) + continue; + + // It is unsafe to drop a function with discardable linkage from a COMDAT + // without also dropping the other members of the COMDAT. + // The inliner doesn't visit non-function entities which are in COMDAT + // groups so it is unsafe to do so *unless* the linkage is local. + if (!F->hasLocalLinkage()) { + if (const Comdat *C = F->getComdat()) { + --ComdatEntriesAlive[C]; + DeadFunctionsInComdats.push_back(CGN); + continue; + } + } + + RemoveCGN(CGN); + } + if (!DeadFunctionsInComdats.empty()) { + // Count up all the entities in COMDAT groups + auto ComdatGroupReferenced = [&](const Comdat *C) { + auto I = ComdatEntriesAlive.find(C); + if (I != ComdatEntriesAlive.end()) + ++(I->getSecond()); + }; + for (const Function &F : CG.getModule()) + if (const Comdat *C = F.getComdat()) + ComdatGroupReferenced(C); + for (const GlobalVariable &GV : CG.getModule().globals()) + if (const Comdat *C = GV.getComdat()) + ComdatGroupReferenced(C); + for (const GlobalAlias &GA : CG.getModule().aliases()) + if (const Comdat *C = GA.getComdat()) + ComdatGroupReferenced(C); + for (CallGraphNode *CGN : DeadFunctionsInComdats) { + Function *F = CGN->getFunction(); + const Comdat *C = F->getComdat(); + int NumAlive = ComdatEntriesAlive[C]; + // We can remove functions in a COMDAT group if the entire group is dead. + assert(NumAlive >= 0); + if (NumAlive > 0) + continue; + + RemoveCGN(CGN); + } + } + + if (FunctionsToRemove.empty()) + return false; + + // Now that we know which functions to delete, do so. We didn't want to do + // this inline, because that would invalidate our CallGraph::iterator + // objects. :( + // + // Note that it doesn't matter that we are iterating over a non-stable order + // here to do this, it doesn't matter which order the functions are deleted + // in. + array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); + FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), + FunctionsToRemove.end()), + FunctionsToRemove.end()); + for (CallGraphNode *CGN : FunctionsToRemove) { + delete CG.removeFunctionFromModule(CGN); + ++NumDeleted; + } + return true; +} diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp new file mode 100644 index 0000000..21bb5d0 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -0,0 +1,269 @@ +//===-- Internalize.cpp - Mark functions internal -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions and variables in the input module. +// If the function or variable is not in the list of external names given to +// the pass it is marked as internal. +// +// This transformation would not be legal in a regular compilation, but it gets +// extra information from the linker about what is safe. +// +// For example: Internalizing a function with external linkage. Only if we are +// told it is only used from within this module, it is safe to do it. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include <fstream> +#include <set> +using namespace llvm; + +#define DEBUG_TYPE "internalize" + +STATISTIC(NumAliases , "Number of aliases internalized"); +STATISTIC(NumFunctions, "Number of functions internalized"); +STATISTIC(NumGlobals , "Number of global vars internalized"); + +// APIFile - A file which contains a list of symbols that should not be marked +// external. +static cl::opt<std::string> +APIFile("internalize-public-api-file", cl::value_desc("filename"), + cl::desc("A file containing list of symbol names to preserve")); + +// APIList - A list of symbols that should not be marked internal. +static cl::list<std::string> +APIList("internalize-public-api-list", cl::value_desc("list"), + cl::desc("A list of symbol names to preserve"), + cl::CommaSeparated); + +namespace { + class InternalizePass : public ModulePass { + std::set<std::string> ExternalNames; + public: + static char ID; // Pass identification, replacement for typeid + explicit InternalizePass(); + explicit InternalizePass(ArrayRef<const char *> ExportList); + void LoadFile(const char *Filename); + bool maybeInternalize(GlobalValue &GV, + const std::set<const Comdat *> &ExternalComdats); + void checkComdatVisibility(GlobalValue &GV, + std::set<const Comdat *> &ExternalComdats); + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<CallGraphWrapperPass>(); + } + }; +} // end anonymous namespace + +char InternalizePass::ID = 0; +INITIALIZE_PASS(InternalizePass, "internalize", + "Internalize Global Symbols", false, false) + +InternalizePass::InternalizePass() : ModulePass(ID) { + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); + if (!APIFile.empty()) // If a filename is specified, use it. + LoadFile(APIFile.c_str()); + ExternalNames.insert(APIList.begin(), APIList.end()); +} + +InternalizePass::InternalizePass(ArrayRef<const char *> ExportList) + : ModulePass(ID) { + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); + for(ArrayRef<const char *>::const_iterator itr = ExportList.begin(); + itr != ExportList.end(); itr++) { + ExternalNames.insert(*itr); + } +} + +void InternalizePass::LoadFile(const char *Filename) { + // Load the APIFile... + std::ifstream In(Filename); + if (!In.good()) { + errs() << "WARNING: Internalize couldn't load file '" << Filename + << "'! Continuing as if it's empty.\n"; + return; // Just continue as if the file were empty + } + while (In) { + std::string Symbol; + In >> Symbol; + if (!Symbol.empty()) + ExternalNames.insert(Symbol); + } +} + +static bool isExternallyVisible(const GlobalValue &GV, + const std::set<std::string> &ExternalNames) { + // Function must be defined here + if (GV.isDeclaration()) + return true; + + // Available externally is really just a "declaration with a body". + if (GV.hasAvailableExternallyLinkage()) + return true; + + // Assume that dllexported symbols are referenced elsewhere + if (GV.hasDLLExportStorageClass()) + return true; + + // Marked to keep external? + if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) + return true; + + return false; +} + +// Internalize GV if it is possible to do so, i.e. it is not externally visible +// and is not a member of an externally visible comdat. +bool InternalizePass::maybeInternalize( + GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) { + if (Comdat *C = GV.getComdat()) { + if (ExternalComdats.count(C)) + return false; + + // If a comdat is not externally visible we can drop it. + if (auto GO = dyn_cast<GlobalObject>(&GV)) + GO->setComdat(nullptr); + + if (GV.hasLocalLinkage()) + return false; + } else { + if (GV.hasLocalLinkage()) + return false; + + if (isExternallyVisible(GV, ExternalNames)) + return false; + } + + GV.setVisibility(GlobalValue::DefaultVisibility); + GV.setLinkage(GlobalValue::InternalLinkage); + return true; +} + +// If GV is part of a comdat and is externally visible, keep track of its +// comdat so that we don't internalize any of its members. +void InternalizePass::checkComdatVisibility( + GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) { + Comdat *C = GV.getComdat(); + if (!C) + return; + + if (isExternallyVisible(GV, ExternalNames)) + ExternalComdats.insert(C); +} + +bool InternalizePass::runOnModule(Module &M) { + CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); + CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; + CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; + + SmallPtrSet<GlobalValue *, 8> Used; + collectUsedGlobalVariables(M, Used, false); + + // Collect comdat visiblity information for the module. + std::set<const Comdat *> ExternalComdats; + if (!M.getComdatSymbolTable().empty()) { + for (Function &F : M) + checkComdatVisibility(F, ExternalComdats); + for (GlobalVariable &GV : M.globals()) + checkComdatVisibility(GV, ExternalComdats); + for (GlobalAlias &GA : M.aliases()) + checkComdatVisibility(GA, ExternalComdats); + } + + // We must assume that globals in llvm.used have a reference that not even + // the linker can see, so we don't internalize them. + // For llvm.compiler.used the situation is a bit fuzzy. The assembler and + // linker can drop those symbols. If this pass is running as part of LTO, + // one might think that it could just drop llvm.compiler.used. The problem + // is that even in LTO llvm doesn't see every reference. For example, + // we don't see references from function local inline assembly. To be + // conservative, we internalize symbols in llvm.compiler.used, but we + // keep llvm.compiler.used so that the symbol is not deleted by llvm. + for (GlobalValue *V : Used) { + ExternalNames.insert(V->getName()); + } + + // Mark all functions not in the api as internal. + for (Function &I : M) { + if (!maybeInternalize(I, ExternalComdats)) + continue; + + if (ExternalNode) + // Remove a callgraph edge from the external node to this function. + ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); + + ++NumFunctions; + DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); + } + + // Never internalize the llvm.used symbol. It is used to implement + // attribute((used)). + // FIXME: Shouldn't this just filter on llvm.metadata section?? + ExternalNames.insert("llvm.used"); + ExternalNames.insert("llvm.compiler.used"); + + // Never internalize anchors used by the machine module info, else the info + // won't find them. (see MachineModuleInfo.) + ExternalNames.insert("llvm.global_ctors"); + ExternalNames.insert("llvm.global_dtors"); + ExternalNames.insert("llvm.global.annotations"); + + // Never internalize symbols code-gen inserts. + // FIXME: We should probably add this (and the __stack_chk_guard) via some + // type of call-back in CodeGen. + ExternalNames.insert("__stack_chk_fail"); + ExternalNames.insert("__stack_chk_guard"); + + // Mark all global variables with initializers that are not in the api as + // internal as well. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (!maybeInternalize(*I, ExternalComdats)) + continue; + + ++NumGlobals; + DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); + } + + // Mark all aliases that are not in the api as internal as well. + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) { + if (!maybeInternalize(*I, ExternalComdats)) + continue; + + ++NumAliases; + DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); + } + + // We do not keep track of whether this pass changed the module because + // it adds unnecessary complexity: + // 1) This pass will generally be near the start of the pass pipeline, so + // there will be no analyses to invalidate. + // 2) This pass will most likely end up changing the module and it isn't worth + // worrying about optimizing the case where the module is unchanged. + return true; +} + +ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } + +ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) { + return new InternalizePass(ExportList); +} diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp new file mode 100644 index 0000000..8e4ad64 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -0,0 +1,310 @@ +//===- LoopExtractor.cpp - Extract each loop into a new function ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A pass wrapper around the ExtractLoop() scalar transformation to extract each +// top-level loop into its own new function. If the loop is the ONLY loop in a +// given function, it is not touched. This is a pass most useful for debugging +// via bugpoint. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include <fstream> +#include <set> +using namespace llvm; + +#define DEBUG_TYPE "loop-extract" + +STATISTIC(NumExtracted, "Number of loops extracted"); + +namespace { + struct LoopExtractor : public LoopPass { + static char ID; // Pass identification, replacement for typeid + unsigned NumLoops; + + explicit LoopExtractor(unsigned numLoops = ~0) + : LoopPass(ID), NumLoops(numLoops) { + initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + } + }; +} + +char LoopExtractor::ID = 0; +INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) + +namespace { + /// SingleLoopExtractor - For bugpoint. + struct SingleLoopExtractor : public LoopExtractor { + static char ID; // Pass identification, replacement for typeid + SingleLoopExtractor() : LoopExtractor(1) {} + }; +} // End anonymous namespace + +char SingleLoopExtractor::ID = 0; +INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", + "Extract at most one loop into a new function", false, false) + +// createLoopExtractorPass - This pass extracts all natural loops from the +// program into a function if it can. +// +Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } + +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) { + if (skipOptnoneFunction(L)) + return false; + + // Only visit top-level loops. + if (L->getParentLoop()) + return false; + + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) + return false; + + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + bool Changed = false; + + // If there is more than one top-level loop in this function, extract all of + // the loops. Otherwise there is exactly one top-level loop; in this case if + // this function is more than a minimal wrapper around the loop, extract + // the loop. + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + TerminatorInst *EntryTI = + L->getHeader()->getParent()->getEntryBlock().getTerminator(); + if (!isa<BranchInst>(EntryTI) || + !cast<BranchInst>(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != L->getHeader()) { + ShouldExtractLoop = true; + } else { + // Check to see if any exits from the loop are more than just return + // blocks. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { + ShouldExtractLoop = true; + break; + } + } + + if (ShouldExtractLoop) { + // We must omit EH pads. EH pads must accompany the invoke + // instruction. But this would result in a loop in the extracted + // function. An infinite cycle occurs when it tries to extract that loop as + // well. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i]->isEHPad()) { + ShouldExtractLoop = false; + break; + } + } + + if (ShouldExtractLoop) { + if (NumLoops == 0) return Changed; + --NumLoops; + CodeExtractor Extractor(DT, *L); + if (Extractor.extractCodeRegion() != nullptr) { + Changed = true; + // After extraction, the loop is replaced by a function call, so + // we shouldn't try to run any more loop passes on it. + LI.updateUnloop(L); + } + ++NumExtracted; + } + + return Changed; +} + +// createSingleLoopExtractorPass - This pass extracts one natural loop from the +// program into a function if it can. This is used by bugpoint. +// +Pass *llvm::createSingleLoopExtractorPass() { + return new SingleLoopExtractor(); +} + + +// BlockFile - A file which contains a list of blocks that should not be +// extracted. +static cl::opt<std::string> +BlockFile("extract-blocks-file", cl::value_desc("filename"), + cl::desc("A file containing list of basic blocks to not extract"), + cl::Hidden); + +namespace { + /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks + /// from the module into their own functions except for those specified by the + /// BlocksToNotExtract list. + class BlockExtractorPass : public ModulePass { + void LoadFile(const char *Filename); + void SplitLandingPadPreds(Function *F); + + std::vector<BasicBlock*> BlocksToNotExtract; + std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; + public: + static char ID; // Pass identification, replacement for typeid + BlockExtractorPass() : ModulePass(ID) { + if (!BlockFile.empty()) + LoadFile(BlockFile.c_str()); + } + + bool runOnModule(Module &M) override; + }; +} + +char BlockExtractorPass::ID = 0; +INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", + "Extract Basic Blocks From Module (for bugpoint use)", + false, false) + +// createBlockExtractorPass - This pass extracts all blocks (except those +// specified in the argument list) from the functions in the module. +// +ModulePass *llvm::createBlockExtractorPass() { + return new BlockExtractorPass(); +} + +void BlockExtractorPass::LoadFile(const char *Filename) { + // Load the BlockFile... + std::ifstream In(Filename); + if (!In.good()) { + errs() << "WARNING: BlockExtractor couldn't load file '" << Filename + << "'!\n"; + return; + } + while (In) { + std::string FunctionName, BlockName; + In >> FunctionName; + In >> BlockName; + if (!BlockName.empty()) + BlocksToNotExtractByName.push_back( + std::make_pair(FunctionName, BlockName)); + } +} + +/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke +/// instruction. The critical edge breaker will refuse to break critical edges +/// to a landing pad. So do them here. After this method runs, all landing pads +/// should have only one predecessor. +void BlockExtractorPass::SplitLandingPadPreds(Function *F) { + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + InvokeInst *II = dyn_cast<InvokeInst>(I); + if (!II) continue; + BasicBlock *Parent = II->getParent(); + BasicBlock *LPad = II->getUnwindDest(); + + // Look through the landing pad's predecessors. If one of them ends in an + // 'invoke', then we want to split the landing pad. + bool Split = false; + for (pred_iterator + PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { + BasicBlock *BB = *PI; + if (BB->isLandingPad() && BB != Parent && + isa<InvokeInst>(Parent->getTerminator())) { + Split = true; + break; + } + } + + if (!Split) continue; + + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs); + } +} + +bool BlockExtractorPass::runOnModule(Module &M) { + std::set<BasicBlock*> TranslatedBlocksToNotExtract; + for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { + BasicBlock *BB = BlocksToNotExtract[i]; + Function *F = BB->getParent(); + + // Map the corresponding function in this module. + Function *MF = M.getFunction(F->getName()); + assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?"); + + // Figure out which index the basic block is in its function. + Function::iterator BBI = MF->begin(); + std::advance(BBI, std::distance(F->begin(), Function::iterator(BB))); + TranslatedBlocksToNotExtract.insert(&*BBI); + } + + while (!BlocksToNotExtractByName.empty()) { + // There's no way to find BBs by name without looking at every BB inside + // every Function. Fortunately, this is always empty except when used by + // bugpoint in which case correctness is more important than performance. + + std::string &FuncName = BlocksToNotExtractByName.back().first; + std::string &BlockName = BlocksToNotExtractByName.back().second; + + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + Function &F = *FI; + if (F.getName() != FuncName) continue; + + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock &BB = *BI; + if (BB.getName() != BlockName) continue; + + TranslatedBlocksToNotExtract.insert(&*BI); + } + } + + BlocksToNotExtractByName.pop_back(); + } + + // Now that we know which blocks to not extract, figure out which ones we WANT + // to extract. + std::vector<BasicBlock*> BlocksToExtract; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + SplitLandingPadPreds(&*F); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (!TranslatedBlocksToNotExtract.count(&*BB)) + BlocksToExtract.push_back(&*BB); + } + + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { + SmallVector<BasicBlock*, 2> BlocksToExtractVec; + BlocksToExtractVec.push_back(BlocksToExtract[i]); + if (const InvokeInst *II = + dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator())) + BlocksToExtractVec.push_back(II->getUnwindDest()); + CodeExtractor(BlocksToExtractVec).extractCodeRegion(); + } + + return !BlocksToExtract.empty(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp new file mode 100644 index 0000000..7b51574 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp @@ -0,0 +1,1055 @@ +//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. +// See http://llvm.org/docs/LangRef.html#bitsets for more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/LowerBitSets.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "lowerbitsets" + +STATISTIC(ByteArraySizeBits, "Byte array size in bits"); +STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); +STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); +STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); +STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); + +static cl::opt<bool> AvoidReuse( + "lowerbitsets-avoid-reuse", + cl::desc("Try to avoid reuse of byte array addresses using aliases"), + cl::Hidden, cl::init(true)); + +bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { + if (Offset < ByteOffset) + return false; + + if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) + return false; + + uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; + if (BitOffset >= BitSize) + return false; + + return Bits.count(BitOffset); +} + +bool BitSetInfo::containsValue( + const DataLayout &DL, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V, + uint64_t COffset) const { + if (auto GV = dyn_cast<GlobalObject>(V)) { + auto I = GlobalLayout.find(GV); + if (I == GlobalLayout.end()) + return false; + return containsGlobalOffset(I->second + COffset); + } + + if (auto GEP = dyn_cast<GEPOperator>(V)) { + APInt APOffset(DL.getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(DL, APOffset); + if (!Result) + return false; + COffset += APOffset.getZExtValue(); + return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), + COffset); + } + + if (auto Op = dyn_cast<Operator>(V)) { + if (Op->getOpcode() == Instruction::BitCast) + return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset); + + if (Op->getOpcode() == Instruction::Select) + return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) && + containsValue(DL, GlobalLayout, Op->getOperand(2), COffset); + } + + return false; +} + +void BitSetInfo::print(raw_ostream &OS) const { + OS << "offset " << ByteOffset << " size " << BitSize << " align " + << (1 << AlignLog2); + + if (isAllOnes()) { + OS << " all-ones\n"; + return; + } + + OS << " { "; + for (uint64_t B : Bits) + OS << B << ' '; + OS << "}\n"; +} + +BitSetInfo BitSetBuilder::build() { + if (Min > Max) + Min = 0; + + // Normalize each offset against the minimum observed offset, and compute + // the bitwise OR of each of the offsets. The number of trailing zeros + // in the mask gives us the log2 of the alignment of all offsets, which + // allows us to compress the bitset by only storing one bit per aligned + // address. + uint64_t Mask = 0; + for (uint64_t &Offset : Offsets) { + Offset -= Min; + Mask |= Offset; + } + + BitSetInfo BSI; + BSI.ByteOffset = Min; + + BSI.AlignLog2 = 0; + if (Mask != 0) + BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); + + // Build the compressed bitset while normalizing the offsets against the + // computed alignment. + BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; + for (uint64_t Offset : Offsets) { + Offset >>= BSI.AlignLog2; + BSI.Bits.insert(Offset); + } + + return BSI; +} + +void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { + // Create a new fragment to hold the layout for F. + Fragments.emplace_back(); + std::vector<uint64_t> &Fragment = Fragments.back(); + uint64_t FragmentIndex = Fragments.size() - 1; + + for (auto ObjIndex : F) { + uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; + if (OldFragmentIndex == 0) { + // We haven't seen this object index before, so just add it to the current + // fragment. + Fragment.push_back(ObjIndex); + } else { + // This index belongs to an existing fragment. Copy the elements of the + // old fragment into this one and clear the old fragment. We don't update + // the fragment map just yet, this ensures that any further references to + // indices from the old fragment in this fragment do not insert any more + // indices. + std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; + Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); + OldFragment.clear(); + } + } + + // Update the fragment map to point our object indices to this fragment. + for (uint64_t ObjIndex : Fragment) + FragmentMap[ObjIndex] = FragmentIndex; +} + +void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, + uint64_t BitSize, uint64_t &AllocByteOffset, + uint8_t &AllocMask) { + // Find the smallest current allocation. + unsigned Bit = 0; + for (unsigned I = 1; I != BitsPerByte; ++I) + if (BitAllocs[I] < BitAllocs[Bit]) + Bit = I; + + AllocByteOffset = BitAllocs[Bit]; + + // Add our size to it. + unsigned ReqSize = AllocByteOffset + BitSize; + BitAllocs[Bit] = ReqSize; + if (Bytes.size() < ReqSize) + Bytes.resize(ReqSize); + + // Set our bits. + AllocMask = 1 << Bit; + for (uint64_t B : Bits) + Bytes[AllocByteOffset + B] |= AllocMask; +} + +namespace { + +struct ByteArrayInfo { + std::set<uint64_t> Bits; + uint64_t BitSize; + GlobalVariable *ByteArray; + Constant *Mask; +}; + +struct LowerBitSets : public ModulePass { + static char ID; + LowerBitSets() : ModulePass(ID) { + initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); + } + + Module *M; + + bool LinkerSubsectionsViaSymbols; + Triple::ArchType Arch; + Triple::ObjectFormatType ObjectFormat; + IntegerType *Int1Ty; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + Type *Int32PtrTy; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + + // The llvm.bitsets named metadata. + NamedMDNode *BitSetNM; + + // Mapping from bitset identifiers to the call sites that test them. + DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites; + + std::vector<ByteArrayInfo> ByteArrayInfos; + + BitSetInfo + buildBitSet(Metadata *BitSet, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); + ByteArrayInfo *createByteArray(BitSetInfo &BSI); + void allocateByteArrays(); + Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Value *BitOffset); + void lowerBitSetCalls(ArrayRef<Metadata *> BitSets, + Constant *CombinedGlobalAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); + Value * + lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Constant *CombinedGlobal, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); + void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets, + ArrayRef<GlobalVariable *> Globals); + unsigned getJumpTableEntrySize(); + Type *getJumpTableEntryType(); + Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest, + unsigned Distance); + void verifyBitSetMDNode(MDNode *Op); + void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, + ArrayRef<Function *> Functions); + void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets, + ArrayRef<GlobalObject *> Globals); + bool buildBitSets(); + bool eraseBitSetMetadata(); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +char LowerBitSets::ID = 0; + +ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } + +bool LowerBitSets::doInitialization(Module &Mod) { + M = &Mod; + const DataLayout &DL = Mod.getDataLayout(); + + Triple TargetTriple(M->getTargetTriple()); + LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + Arch = TargetTriple.getArch(); + ObjectFormat = TargetTriple.getObjectFormat(); + + Int1Ty = Type::getInt1Ty(M->getContext()); + Int8Ty = Type::getInt8Ty(M->getContext()); + Int32Ty = Type::getInt32Ty(M->getContext()); + Int32PtrTy = PointerType::getUnqual(Int32Ty); + Int64Ty = Type::getInt64Ty(M->getContext()); + IntPtrTy = DL.getIntPtrType(M->getContext(), 0); + + BitSetNM = M->getNamedMetadata("llvm.bitsets"); + + BitSetTestCallSites.clear(); + + return false; +} + +/// Build a bit set for BitSet using the object layouts in +/// GlobalLayout. +BitSetInfo LowerBitSets::buildBitSet( + Metadata *BitSet, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { + BitSetBuilder BSB; + + // Compute the byte offset of each element of this bitset. + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + Constant *OpConst = + cast<ConstantAsMetadata>(Op->getOperand(1))->getValue(); + if (auto GA = dyn_cast<GlobalAlias>(OpConst)) + OpConst = GA->getAliasee(); + auto OpGlobal = dyn_cast<GlobalObject>(OpConst); + if (!OpGlobal) + continue; + uint64_t Offset = + cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2)) + ->getValue())->getZExtValue(); + + Offset += GlobalLayout.find(OpGlobal)->second; + + BSB.addOffset(Offset); + } + } + + return BSB.build(); +} + +/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in +/// Bits. This pattern matches to the bt instruction on x86. +static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, + Value *BitOffset) { + auto BitsType = cast<IntegerType>(Bits->getType()); + unsigned BitWidth = BitsType->getBitWidth(); + + BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); + Value *BitIndex = + B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); + Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); + Value *MaskedBits = B.CreateAnd(Bits, BitMask); + return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); +} + +ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { + // Create globals to stand in for byte arrays and masks. These never actually + // get initialized, we RAUW and erase them later in allocateByteArrays() once + // we know the offset and mask to use. + auto ByteArrayGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + auto MaskGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + + ByteArrayInfos.emplace_back(); + ByteArrayInfo *BAI = &ByteArrayInfos.back(); + + BAI->Bits = BSI.Bits; + BAI->BitSize = BSI.BitSize; + BAI->ByteArray = ByteArrayGlobal; + BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); + return BAI; +} + +void LowerBitSets::allocateByteArrays() { + std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), + [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { + return BAI1.BitSize > BAI2.BitSize; + }); + + std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); + + ByteArrayBuilder BAB; + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + uint8_t Mask; + BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); + + BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); + cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); + } + + Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes); + auto ByteArray = + new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, ByteArrayConst); + + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( + ByteArrayConst->getType(), ByteArray, Idxs); + + // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures + // that the pc-relative displacement is folded into the lea instead of the + // test instruction getting another displacement. + if (LinkerSubsectionsViaSymbols) { + BAI->ByteArray->replaceAllUsesWith(GEP); + } else { + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); + BAI->ByteArray->replaceAllUsesWith(Alias); + } + BAI->ByteArray->eraseFromParent(); + } + + ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + + BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + + BAB.BitAllocs[6] + BAB.BitAllocs[7]; + ByteArraySizeBytes = BAB.Bytes.size(); +} + +/// Build a test that bit BitOffset is set in BSI, where +/// BitSetGlobal is a global containing the bits in BSI. +Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, + ByteArrayInfo *&BAI, Value *BitOffset) { + if (BSI.BitSize <= 64) { + // If the bit set is sufficiently small, we can avoid a load by bit testing + // a constant. + IntegerType *BitsTy; + if (BSI.BitSize <= 32) + BitsTy = Int32Ty; + else + BitsTy = Int64Ty; + + uint64_t Bits = 0; + for (auto Bit : BSI.Bits) + Bits |= uint64_t(1) << Bit; + Constant *BitsConst = ConstantInt::get(BitsTy, Bits); + return createMaskedBitTest(B, BitsConst, BitOffset); + } else { + if (!BAI) { + ++NumByteArraysCreated; + BAI = createByteArray(BSI); + } + + Constant *ByteArray = BAI->ByteArray; + Type *Ty = BAI->ByteArray->getValueType(); + if (!LinkerSubsectionsViaSymbols && AvoidReuse) { + // Each use of the byte array uses a different alias. This makes the + // backend less likely to reuse previously computed byte array addresses, + // improving the security of the CFI mechanism based on this pass. + ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, + GlobalValue::PrivateLinkage, "bits_use", + ByteArray, M); + } + + Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset); + Value *Byte = B.CreateLoad(ByteAddr); + + Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); + return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); + } +} + +/// Lower a llvm.bitset.test call to its implementation. Returns the value to +/// replace the call with. +Value *LowerBitSets::lowerBitSetCall( + CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Constant *CombinedGlobalIntAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { + Value *Ptr = CI->getArgOperand(0); + const DataLayout &DL = M->getDataLayout(); + + if (BSI.containsValue(DL, GlobalLayout, Ptr)) + return ConstantInt::getTrue(M->getContext()); + + Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( + CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); + + BasicBlock *InitialBB = CI->getParent(); + + IRBuilder<> B(CI); + + Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); + + if (BSI.isSingleOffset()) + return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); + + Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); + + Value *BitOffset; + if (BSI.AlignLog2 == 0) { + BitOffset = PtrOffset; + } else { + // We need to check that the offset both falls within our range and is + // suitably aligned. We can check both properties at the same time by + // performing a right rotate by log2(alignment) followed by an integer + // comparison against the bitset size. The rotate will move the lower + // order bits that need to be zero into the higher order bits of the + // result, causing the comparison to fail if they are nonzero. The rotate + // also conveniently gives us a bit offset to use during the load from + // the bitset. + Value *OffsetSHR = + B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); + Value *OffsetSHL = B.CreateShl( + PtrOffset, + ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); + BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); + } + + Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); + Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + + // If the bit set is all ones, testing against it is unnecessary. + if (BSI.isAllOnes()) + return OffsetInRange; + + TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); + IRBuilder<> ThenB(Term); + + // Now that we know that the offset is in range and aligned, load the + // appropriate bit from the bitset. + Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); + + // The value we want is 0 if we came directly from the initial block + // (having failed the range or alignment checks), or the loaded bit if + // we came from the block in which we loaded it. + B.SetInsertPoint(CI); + PHINode *P = B.CreatePHI(Int1Ty, 2); + P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); + P->addIncoming(Bit, ThenB.GetInsertBlock()); + return P; +} + +/// Given a disjoint set of bitsets and globals, layout the globals, build the +/// bit sets and lower the llvm.bitset.test calls. +void LowerBitSets::buildBitSetsFromGlobalVariables( + ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) { + // Build a new global with the combined contents of the referenced globals. + // This global is a struct whose even-indexed elements contain the original + // contents of the referenced globals and whose odd-indexed elements contain + // any padding required to align the next element to the next power of 2. + std::vector<Constant *> GlobalInits; + const DataLayout &DL = M->getDataLayout(); + for (GlobalVariable *G : Globals) { + GlobalInits.push_back(G->getInitializer()); + uint64_t InitSize = DL.getTypeAllocSize(G->getValueType()); + + // Compute the amount of padding required. + uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; + + // Cap at 128 was found experimentally to have a good data/instruction + // overhead tradeoff. + if (Padding > 128) + Padding = RoundUpToAlignment(InitSize, 128) - InitSize; + + GlobalInits.push_back( + ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + } + if (!GlobalInits.empty()) + GlobalInits.pop_back(); + Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); + auto *CombinedGlobal = + new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, NewInit); + + StructType *NewTy = cast<StructType>(NewInit->getType()); + const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); + + // Compute the offsets of the original globals within the new global. + DenseMap<GlobalObject *, uint64_t> GlobalLayout; + for (unsigned I = 0; I != Globals.size(); ++I) + // Multiply by 2 to account for padding elements. + GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); + + lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout); + + // Build aliases pointing to offsets into the combined global for each + // global from which we built the combined global, and replace references + // to the original globals with references to the aliases. + for (unsigned I = 0; I != Globals.size(); ++I) { + // Multiply by 2 to account for padding elements. + Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, I * 2)}; + Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( + NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); + if (LinkerSubsectionsViaSymbols) { + Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + assert(Globals[I]->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, + Globals[I]->getLinkage(), "", + CombinedGlobalElemPtr, M); + GAlias->setVisibility(Globals[I]->getVisibility()); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + } + Globals[I]->eraseFromParent(); + } +} + +void LowerBitSets::lowerBitSetCalls( + ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { + Constant *CombinedGlobalIntAddr = + ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); + + // For each bitset in this disjoint set... + for (Metadata *BS : BitSets) { + // Build the bitset. + BitSetInfo BSI = buildBitSet(BS, GlobalLayout); + DEBUG({ + if (auto BSS = dyn_cast<MDString>(BS)) + dbgs() << BSS->getString() << ": "; + else + dbgs() << "<unnamed>: "; + BSI.print(dbgs()); + }); + + ByteArrayInfo *BAI = nullptr; + + // Lower each call to llvm.bitset.test for this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) { + ++NumBitSetCallsLowered; + Value *Lowered = + lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout); + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } + } +} + +void LowerBitSets::verifyBitSetMDNode(MDNode *Op) { + if (Op->getNumOperands() != 3) + report_fatal_error( + "All operands of llvm.bitsets metadata must have 3 elements"); + if (!Op->getOperand(1)) + return; + + auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1)); + if (!OpConstMD) + report_fatal_error("Bit set element must be a constant"); + auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue()); + if (!OpGlobal) + return; + + if (OpGlobal->isThreadLocal()) + report_fatal_error("Bit set element may not be thread-local"); + if (OpGlobal->hasSection()) + report_fatal_error("Bit set element may not have an explicit section"); + + if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker()) + report_fatal_error("Bit set global var element must be a definition"); + + auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); + if (!OffsetConstMD) + report_fatal_error("Bit set element offset must be a constant"); + auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error("Bit set element offset must be an integer constant"); +} + +static const unsigned kX86JumpTableEntrySize = 8; + +unsigned LowerBitSets::getJumpTableEntrySize() { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + return kX86JumpTableEntrySize; +} + +// Create a constant representing a jump table entry for the target. This +// consists of an instruction sequence containing a relative branch to Dest. The +// constant will be laid out at address Src+(Len*Distance) where Len is the +// target-specific jump table entry size. +Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest, + unsigned Distance) { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + const unsigned kJmpPCRel32Code = 0xe9; + const unsigned kInt3Code = 0xcc; + + ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code); + + // Build a constant representing the displacement between the constant's + // address and Dest. This will resolve to a PC32 relocation referring to Dest. + Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy); + Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy); + Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt); + ConstantInt *DispOffset = + ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5); + Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset); + OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty); + + ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code); + + Constant *Fields[] = { + Jmp, OffsetedDisp, Int3, Int3, Int3, + }; + return ConstantStruct::getAnon(Fields, /*Packed=*/true); +} + +Type *LowerBitSets::getJumpTableEntryType() { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + return StructType::get(M->getContext(), + {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty}, + /*Packed=*/true); +} + +/// Given a disjoint set of bitsets and functions, build a jump table for the +/// functions, build the bit sets and lower the llvm.bitset.test calls. +void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, + ArrayRef<Function *> Functions) { + // Unlike the global bitset builder, the function bitset builder cannot + // re-arrange functions in a particular order and base its calculations on the + // layout of the functions' entry points, as we have no idea how large a + // particular function will end up being (the size could even depend on what + // this pass does!) Instead, we build a jump table, which is a block of code + // consisting of one branch instruction for each of the functions in the bit + // set that branches to the target function, and redirect any taken function + // addresses to the corresponding jump table entry. In the object file's + // symbol table, the symbols for the target functions also refer to the jump + // table entries, so that addresses taken outside the module will pass any + // verification done inside the module. + // + // In more concrete terms, suppose we have three functions f, g, h which are + // members of a single bitset, and a function foo that returns their + // addresses: + // + // f: + // mov 0, %eax + // ret + // + // g: + // mov 1, %eax + // ret + // + // h: + // mov 2, %eax + // ret + // + // foo: + // mov f, %eax + // mov g, %edx + // mov h, %ecx + // ret + // + // To create a jump table for these functions, we instruct the LLVM code + // generator to output a jump table in the .text section. This is done by + // representing the instructions in the jump table as an LLVM constant and + // placing them in a global variable in the .text section. The end result will + // (conceptually) look like this: + // + // f: + // jmp .Ltmp0 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // g: + // jmp .Ltmp1 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // h: + // jmp .Ltmp2 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // .Ltmp0: + // mov 0, %eax + // ret + // + // .Ltmp1: + // mov 1, %eax + // ret + // + // .Ltmp2: + // mov 2, %eax + // ret + // + // foo: + // mov f, %eax + // mov g, %edx + // mov h, %ecx + // ret + // + // Because the addresses of f, g, h are evenly spaced at a power of 2, in the + // normal case the check can be carried out using the same kind of simple + // arithmetic that we normally use for globals. + + assert(!Functions.empty()); + + // Build a simple layout based on the regular layout of jump tables. + DenseMap<GlobalObject *, uint64_t> GlobalLayout; + unsigned EntrySize = getJumpTableEntrySize(); + for (unsigned I = 0; I != Functions.size(); ++I) + GlobalLayout[Functions[I]] = I * EntrySize; + + // Create a constant to hold the jump table. + ArrayType *JumpTableType = + ArrayType::get(getJumpTableEntryType(), Functions.size()); + auto JumpTable = new GlobalVariable(*M, JumpTableType, + /*isConstant=*/true, + GlobalValue::PrivateLinkage, nullptr); + JumpTable->setSection(ObjectFormat == Triple::MachO + ? "__TEXT,__text,regular,pure_instructions" + : ".text"); + lowerBitSetCalls(BitSets, JumpTable, GlobalLayout); + + // Build aliases pointing to offsets into the jump table, and replace + // references to the original functions with references to the aliases. + for (unsigned I = 0; I != Functions.size(); ++I) { + Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( + ConstantExpr::getGetElementPtr( + JumpTableType, JumpTable, + ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, I)}), + Functions[I]->getType()); + if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) { + Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + assert(Functions[I]->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0, + Functions[I]->getLinkage(), "", + CombinedGlobalElemPtr, M); + GAlias->setVisibility(Functions[I]->getVisibility()); + GAlias->takeName(Functions[I]); + Functions[I]->replaceAllUsesWith(GAlias); + } + if (!Functions[I]->isDeclarationForLinker()) + Functions[I]->setLinkage(GlobalValue::PrivateLinkage); + } + + // Build and set the jump table's initializer. + std::vector<Constant *> JumpTableEntries; + for (unsigned I = 0; I != Functions.size(); ++I) + JumpTableEntries.push_back( + createJumpTableEntry(JumpTable, Functions[I], I)); + JumpTable->setInitializer( + ConstantArray::get(JumpTableType, JumpTableEntries)); +} + +void LowerBitSets::buildBitSetsFromDisjointSet( + ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) { + llvm::DenseMap<Metadata *, uint64_t> BitSetIndices; + llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices; + for (unsigned I = 0; I != BitSets.size(); ++I) + BitSetIndices[BitSets[I]] = I; + for (unsigned I = 0; I != Globals.size(); ++I) + GlobalIndices[Globals[I]] = I; + + // For each bitset, build a set of indices that refer to globals referenced by + // the bitset. + std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + // Op = { bitset name, global, offset } + if (!Op->getOperand(1)) + continue; + auto I = BitSetIndices.find(Op->getOperand(0)); + if (I == BitSetIndices.end()) + continue; + + auto OpGlobal = dyn_cast<GlobalObject>( + cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); + if (!OpGlobal) + continue; + BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); + } + } + + // Order the sets of indices by size. The GlobalLayoutBuilder works best + // when given small index sets first. + std::stable_sort( + BitSetMembers.begin(), BitSetMembers.end(), + [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { + return O1.size() < O2.size(); + }); + + // Create a GlobalLayoutBuilder and provide it with index sets as layout + // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as + // close together as possible. + GlobalLayoutBuilder GLB(Globals.size()); + for (auto &&MemSet : BitSetMembers) + GLB.addFragment(MemSet); + + // Build the bitsets from this disjoint set. + if (Globals.empty() || isa<GlobalVariable>(Globals[0])) { + // Build a vector of global variables with the computed layout. + std::vector<GlobalVariable *> OrderedGVs(Globals.size()); + auto OGI = OrderedGVs.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + auto GV = dyn_cast<GlobalVariable>(Globals[Offset]); + if (!GV) + report_fatal_error( + "Bit set may not contain both global variables and functions"); + *OGI++ = GV; + } + } + + buildBitSetsFromGlobalVariables(BitSets, OrderedGVs); + } else { + // Build a vector of functions with the computed layout. + std::vector<Function *> OrderedFns(Globals.size()); + auto OFI = OrderedFns.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + auto Fn = dyn_cast<Function>(Globals[Offset]); + if (!Fn) + report_fatal_error( + "Bit set may not contain both global variables and functions"); + *OFI++ = Fn; + } + } + + buildBitSetsFromFunctions(BitSets, OrderedFns); + } +} + +/// Lower all bit sets in this module. +bool LowerBitSets::buildBitSets() { + Function *BitSetTestFunc = + M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + if (!BitSetTestFunc) + return false; + + // Equivalence class set containing bitsets and the globals they reference. + // This is used to partition the set of bitsets in the module into disjoint + // sets. + typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>> + GlobalClassesTy; + GlobalClassesTy GlobalClasses; + + // Verify the bitset metadata and build a mapping from bitset identifiers to + // their last observed index in BitSetNM. This will used later to + // deterministically order the list of bitset identifiers. + llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices; + if (BitSetNM) { + for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) { + MDNode *Op = BitSetNM->getOperand(I); + verifyBitSetMDNode(Op); + BitSetIdIndices[Op->getOperand(0)] = I; + } + } + + for (const Use &U : BitSetTestFunc->uses()) { + auto CI = cast<CallInst>(U.getUser()); + + auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); + if (!BitSetMDVal) + report_fatal_error( + "Second argument of llvm.bitset.test must be metadata"); + auto BitSet = BitSetMDVal->getMetadata(); + + // Add the call site to the list of call sites for this bit set. We also use + // BitSetTestCallSites to keep track of whether we have seen this bit set + // before. If we have, we don't need to re-add the referenced globals to the + // equivalence class. + std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, + bool> Ins = + BitSetTestCallSites.insert( + std::make_pair(BitSet, std::vector<CallInst *>())); + Ins.first->second.push_back(CI); + if (!Ins.second) + continue; + + // Add the bitset to the equivalence class. + GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); + GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); + + if (!BitSetNM) + continue; + + // Add the referenced globals to the bitset's equivalence class. + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + + auto OpGlobal = dyn_cast<GlobalObject>( + cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); + if (!OpGlobal) + continue; + + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); + } + } + + if (GlobalClasses.empty()) + return false; + + // Build a list of disjoint sets ordered by their maximum BitSetNM index + // for determinism. + std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets; + for (GlobalClassesTy::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) continue; + ++NumBitSetDisjointSets; + + unsigned MaxIndex = 0; + for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is<Metadata *>()) + MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]); + } + Sets.emplace_back(I, MaxIndex); + } + std::sort(Sets.begin(), Sets.end(), + [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1, + const std::pair<GlobalClassesTy::iterator, unsigned> &S2) { + return S1.second < S2.second; + }); + + // For each disjoint set we found... + for (const auto &S : Sets) { + // Build the list of bitsets in this disjoint set. + std::vector<Metadata *> BitSets; + std::vector<GlobalObject *> Globals; + for (GlobalClassesTy::member_iterator MI = + GlobalClasses.member_begin(S.first); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is<Metadata *>()) + BitSets.push_back(MI->get<Metadata *>()); + else + Globals.push_back(MI->get<GlobalObject *>()); + } + + // Order bitsets by BitSetNM index for determinism. This ordering is stable + // as there is a one-to-one mapping between metadata and indices. + std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) { + return BitSetIdIndices[M1] < BitSetIdIndices[M2]; + }); + + // Lower the bitsets in this disjoint set. + buildBitSetsFromDisjointSet(BitSets, Globals); + } + + allocateByteArrays(); + + return true; +} + +bool LowerBitSets::eraseBitSetMetadata() { + if (!BitSetNM) + return false; + + M->eraseNamedMetadata(BitSetNM); + return true; +} + +bool LowerBitSets::runOnModule(Module &M) { + bool Changed = buildBitSets(); + Changed |= eraseBitSetMetadata(); + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp new file mode 100644 index 0000000..8a209a1 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -0,0 +1,1880 @@ +//===- MergeFunctions.cpp - Merge identical functions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for equivalent functions that are mergable and folds them. +// +// Order relation is defined on set of functions. It was made through +// special function comparison procedure that returns +// 0 when functions are equal, +// -1 when Left function is less than right function, and +// 1 for opposite case. We need total-ordering, so we need to maintain +// four properties on the functions set: +// a <= a (reflexivity) +// if a <= b and b <= a then a = b (antisymmetry) +// if a <= b and b <= c then a <= c (transitivity). +// for all a and b: a <= b or b <= a (totality). +// +// Comparison iterates through each instruction in each basic block. +// Functions are kept on binary tree. For each new function F we perform +// lookup in binary tree. +// In practice it works the following way: +// -- We define Function* container class with custom "operator<" (FunctionPtr). +// -- "FunctionPtr" instances are stored in std::set collection, so every +// std::set::insert operation will give you result in log(N) time. +// +// As an optimization, a hash of the function structure is calculated first, and +// two functions are only compared if they have the same hash. This hash is +// cheap to compute, and has the property that if function F == G according to +// the comparison function, then hash(F) == hash(G). This consistency property +// is critical to ensuring all possible merging opportunities are exploited. +// Collisions in the hash affect the speed of the pass but not the correctness +// or determinism of the resulting transformation. +// +// When a match is found the functions are folded. If both functions are +// overridable, we move the functionality into a new internal function and +// leave two overridable thunks to it. +// +//===----------------------------------------------------------------------===// +// +// Future work: +// +// * virtual functions. +// +// Many functions have their address taken by the virtual function table for +// the object they belong to. However, as long as it's only used for a lookup +// and call, this is irrelevant, and we'd like to fold such functions. +// +// * be smarter about bitcasts. +// +// In order to fold functions, we will sometimes add either bitcast instructions +// or bitcast constant expressions. Unfortunately, this can confound further +// analysis since the two functions differ where one has a bitcast and the +// other doesn't. We should learn to look through bitcasts. +// +// * Compare complex types with pointer types inside. +// * Compare cross-reference cases. +// * Compare complex expressions. +// +// All the three issues above could be described as ability to prove that +// fA == fB == fC == fE == fF == fG in example below: +// +// void fA() { +// fB(); +// } +// void fB() { +// fA(); +// } +// +// void fE() { +// fF(); +// } +// void fF() { +// fG(); +// } +// void fG() { +// fE(); +// } +// +// Simplest cross-reference case (fA <--> fB) was implemented in previous +// versions of MergeFunctions, though it presented only in two function pairs +// in test-suite (that counts >50k functions) +// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A) +// could cover much more cases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "mergefunc" + +STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumThunksWritten, "Number of thunks generated"); +STATISTIC(NumAliasesWritten, "Number of aliases generated"); +STATISTIC(NumDoubleWeak, "Number of new functions created"); + +static cl::opt<unsigned> NumFunctionsForSanityCheck( + "mergefunc-sanity", + cl::desc("How many functions in module could be used for " + "MergeFunctions pass sanity check. " + "'0' disables this check. Works only with '-debug' key."), + cl::init(0), cl::Hidden); + +namespace { + +/// GlobalNumberState assigns an integer to each global value in the program, +/// which is used by the comparison routine to order references to globals. This +/// state must be preserved throughout the pass, because Functions and other +/// globals need to maintain their relative order. Globals are assigned a number +/// when they are first visited. This order is deterministic, and so the +/// assigned numbers are as well. When two functions are merged, neither number +/// is updated. If the symbols are weak, this would be incorrect. If they are +/// strong, then one will be replaced at all references to the other, and so +/// direct callsites will now see one or the other symbol, and no update is +/// necessary. Note that if we were guaranteed unique names, we could just +/// compare those, but this would not work for stripped bitcodes or for those +/// few symbols without a name. +class GlobalNumberState { + struct Config : ValueMapConfig<GlobalValue*> { + enum { FollowRAUW = false }; + }; + // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW + // occurs, the mapping does not change. Tracking changes is unnecessary, and + // also problematic for weak symbols (which may be overwritten). + typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap; + ValueNumberMap GlobalNumbers; + // The next unused serial number to assign to a global. + uint64_t NextNumber; + public: + GlobalNumberState() : GlobalNumbers(), NextNumber(0) {} + uint64_t getNumber(GlobalValue* Global) { + ValueNumberMap::iterator MapIter; + bool Inserted; + std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber}); + if (Inserted) + NextNumber++; + return MapIter->second; + } + void clear() { + GlobalNumbers.clear(); + } +}; + +/// FunctionComparator - Compares two functions to determine whether or not +/// they will generate machine code with the same behaviour. DataLayout is +/// used if available. The comparator always fails conservatively (erring on the +/// side of claiming that two functions are different). +class FunctionComparator { +public: + FunctionComparator(const Function *F1, const Function *F2, + GlobalNumberState* GN) + : FnL(F1), FnR(F2), GlobalNumbers(GN) {} + + /// Test whether the two functions have equivalent behaviour. + int compare(); + /// Hash a function. Equivalent functions will have the same hash, and unequal + /// functions will have different hashes with high probability. + typedef uint64_t FunctionHash; + static FunctionHash functionHash(Function &); + +private: + /// Test whether two basic blocks have equivalent behaviour. + int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR); + + /// Constants comparison. + /// Its analog to lexicographical comparison between hypothetical numbers + /// of next format: + /// <bitcastability-trait><raw-bit-contents> + /// + /// 1. Bitcastability. + /// Check whether L's type could be losslessly bitcasted to R's type. + /// On this stage method, in case when lossless bitcast is not possible + /// method returns -1 or 1, thus also defining which type is greater in + /// context of bitcastability. + /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight + /// to the contents comparison. + /// If types differ, remember types comparison result and check + /// whether we still can bitcast types. + /// Stage 1: Types that satisfies isFirstClassType conditions are always + /// greater then others. + /// Stage 2: Vector is greater then non-vector. + /// If both types are vectors, then vector with greater bitwidth is + /// greater. + /// If both types are vectors with the same bitwidth, then types + /// are bitcastable, and we can skip other stages, and go to contents + /// comparison. + /// Stage 3: Pointer types are greater than non-pointers. If both types are + /// pointers of the same address space - go to contents comparison. + /// Different address spaces: pointer with greater address space is + /// greater. + /// Stage 4: Types are neither vectors, nor pointers. And they differ. + /// We don't know how to bitcast them. So, we better don't do it, + /// and return types comparison result (so it determines the + /// relationship among constants we don't know how to bitcast). + /// + /// Just for clearance, let's see how the set of constants could look + /// on single dimension axis: + /// + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// Where: NFCT - Not a FirstClassType + /// FCT - FirstClassTyp: + /// + /// 2. Compare raw contents. + /// It ignores types on this stage and only compares bits from L and R. + /// Returns 0, if L and R has equivalent contents. + /// -1 or 1 if values are different. + /// Pretty trivial: + /// 2.1. If contents are numbers, compare numbers. + /// Ints with greater bitwidth are greater. Ints with same bitwidths + /// compared by their contents. + /// 2.2. "And so on". Just to avoid discrepancies with comments + /// perhaps it would be better to read the implementation itself. + /// 3. And again about overall picture. Let's look back at how the ordered set + /// of constants will look like: + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// + /// Now look, what could be inside [FCT, "others"], for example: + /// [FCT, "others"] = + /// [ + /// [double 0.1], [double 1.23], + /// [i32 1], [i32 2], + /// { double 1.0 }, ; StructTyID, NumElements = 1 + /// { i32 1 }, ; StructTyID, NumElements = 1 + /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 + /// { i32 1, double 1 } ; StructTyID, NumElements = 2 + /// ] + /// + /// Let's explain the order. Float numbers will be less than integers, just + /// because of cmpType terms: FloatTyID < IntegerTyID. + /// Floats (with same fltSemantics) are sorted according to their value. + /// Then you can see integers, and they are, like a floats, + /// could be easy sorted among each others. + /// The structures. Structures are grouped at the tail, again because of their + /// TypeID: StructTyID > IntegerTyID > FloatTyID. + /// Structures with greater number of elements are greater. Structures with + /// greater elements going first are greater. + /// The same logic with vectors, arrays and other possible complex types. + /// + /// Bitcastable constants. + /// Let's assume, that some constant, belongs to some group of + /// "so-called-equal" values with different types, and at the same time + /// belongs to another group of constants with equal types + /// and "really" equal values. + /// + /// Now, prove that this is impossible: + /// + /// If constant A with type TyA is bitcastable to B with type TyB, then: + /// 1. All constants with equal types to TyA, are bitcastable to B. Since + /// those should be vectors (if TyA is vector), pointers + /// (if TyA is pointer), or else (if TyA equal to TyB), those types should + /// be equal to TyB. + /// 2. All constants with non-equal, but bitcastable types to TyA, are + /// bitcastable to B. + /// Once again, just because we allow it to vectors and pointers only. + /// This statement could be expanded as below: + /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to + /// vector B, and thus bitcastable to B as well. + /// 2.2. All pointers of the same address space, no matter what they point to, + /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. + /// So any constant equal or bitcastable to A is equal or bitcastable to B. + /// QED. + /// + /// In another words, for pointers and vectors, we ignore top-level type and + /// look at their particular properties (bit-width for vectors, and + /// address space for pointers). + /// If these properties are equal - compare their contents. + int cmpConstants(const Constant *L, const Constant *R); + + /// Compares two global values by number. Uses the GlobalNumbersState to + /// identify the same gobals across function calls. + int cmpGlobalValues(GlobalValue *L, GlobalValue *R); + + /// Assign or look up previously assigned numbers for the two values, and + /// return whether the numbers are equal. Numbers are assigned in the order + /// visited. + /// Comparison order: + /// Stage 0: Value that is function itself is always greater then others. + /// If left and right values are references to their functions, then + /// they are equal. + /// Stage 1: Constants are greater than non-constants. + /// If both left and right are constants, then the result of + /// cmpConstants is used as cmpValues result. + /// Stage 2: InlineAsm instances are greater than others. If both left and + /// right are InlineAsm instances, InlineAsm* pointers casted to + /// integers and compared as numbers. + /// Stage 3: For all other cases we compare order we meet these values in + /// their functions. If right value was met first during scanning, + /// then left value is greater. + /// In another words, we compare serial numbers, for more details + /// see comments for sn_mapL and sn_mapR. + int cmpValues(const Value *L, const Value *R); + + /// Compare two Instructions for equivalence, similar to + /// Instruction::isSameOperationAs but with modifications to the type + /// comparison. + /// Stages are listed in "most significant stage first" order: + /// On each stage below, we do comparison between some left and right + /// operation parts. If parts are non-equal, we assign parts comparison + /// result to the operation comparison result and exit from method. + /// Otherwise we proceed to the next stage. + /// Stages: + /// 1. Operations opcodes. Compared as numbers. + /// 2. Number of operands. + /// 3. Operation types. Compared with cmpType method. + /// 4. Compare operation subclass optional data as stream of bytes: + /// just convert it to integers and call cmpNumbers. + /// 5. Compare in operation operand types with cmpType in + /// most significant operand first order. + /// 6. Last stage. Check operations for some specific attributes. + /// For example, for Load it would be: + /// 6.1.Load: volatile (as boolean flag) + /// 6.2.Load: alignment (as integer numbers) + /// 6.3.Load: synch-scope (as integer numbers) + /// 6.4.Load: range metadata (as integer numbers) + /// On this stage its better to see the code, since its not more than 10-15 + /// strings for particular instruction, and could change sometimes. + int cmpOperations(const Instruction *L, const Instruction *R) const; + + /// Compare two GEPs for equivalent pointer arithmetic. + /// Parts to be compared for each comparison stage, + /// most significant stage first: + /// 1. Address space. As numbers. + /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method). + /// 3. Pointer operand type (using cmpType method). + /// 4. Number of operands. + /// 5. Compare operands, using cmpValues method. + int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR); + int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { + return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); + } + + /// cmpType - compares two types, + /// defines total ordering among the types set. + /// + /// Return values: + /// 0 if types are equal, + /// -1 if Left is less than Right, + /// +1 if Left is greater than Right. + /// + /// Description: + /// Comparison is broken onto stages. Like in lexicographical comparison + /// stage coming first has higher priority. + /// On each explanation stage keep in mind total ordering properties. + /// + /// 0. Before comparison we coerce pointer types of 0 address space to + /// integer. + /// We also don't bother with same type at left and right, so + /// just return 0 in this case. + /// + /// 1. If types are of different kind (different type IDs). + /// Return result of type IDs comparison, treating them as numbers. + /// 2. If types are integers, check that they have the same width. If they + /// are vectors, check that they have the same count and subtype. + /// 3. Types have the same ID, so check whether they are one of: + /// * Void + /// * Float + /// * Double + /// * X86_FP80 + /// * FP128 + /// * PPC_FP128 + /// * Label + /// * Metadata + /// We can treat these types as equal whenever their IDs are same. + /// 4. If Left and Right are pointers, return result of address space + /// comparison (numbers comparison). We can treat pointer types of same + /// address space as equal. + /// 5. If types are complex. + /// Then both Left and Right are to be expanded and their element types will + /// be checked with the same way. If we get Res != 0 on some stage, return it. + /// Otherwise return 0. + /// 6. For all other cases put llvm_unreachable. + int cmpTypes(Type *TyL, Type *TyR) const; + + int cmpNumbers(uint64_t L, uint64_t R) const; + int cmpAPInts(const APInt &L, const APInt &R) const; + int cmpAPFloats(const APFloat &L, const APFloat &R) const; + int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; + int cmpMem(StringRef L, StringRef R) const; + int cmpAttrs(const AttributeSet L, const AttributeSet R) const; + int cmpRangeMetadata(const MDNode* L, const MDNode* R) const; + int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; + + // The two functions undergoing comparison. + const Function *FnL, *FnR; + + /// Assign serial numbers to values from left function, and values from + /// right function. + /// Explanation: + /// Being comparing functions we need to compare values we meet at left and + /// right sides. + /// Its easy to sort things out for external values. It just should be + /// the same value at left and right. + /// But for local values (those were introduced inside function body) + /// we have to ensure they were introduced at exactly the same place, + /// and plays the same role. + /// Let's assign serial number to each value when we meet it first time. + /// Values that were met at same place will be with same serial numbers. + /// In this case it would be good to explain few points about values assigned + /// to BBs and other ways of implementation (see below). + /// + /// 1. Safety of BB reordering. + /// It's safe to change the order of BasicBlocks in function. + /// Relationship with other functions and serial numbering will not be + /// changed in this case. + /// As follows from FunctionComparator::compare(), we do CFG walk: we start + /// from the entry, and then take each terminator. So it doesn't matter how in + /// fact BBs are ordered in function. And since cmpValues are called during + /// this walk, the numbering depends only on how BBs located inside the CFG. + /// So the answer is - yes. We will get the same numbering. + /// + /// 2. Impossibility to use dominance properties of values. + /// If we compare two instruction operands: first is usage of local + /// variable AL from function FL, and second is usage of local variable AR + /// from FR, we could compare their origins and check whether they are + /// defined at the same place. + /// But, we are still not able to compare operands of PHI nodes, since those + /// could be operands from further BBs we didn't scan yet. + /// So it's impossible to use dominance properties in general. + DenseMap<const Value*, int> sn_mapL, sn_mapR; + + // The global state we will use + GlobalNumberState* GlobalNumbers; +}; + +class FunctionNode { + mutable AssertingVH<Function> F; + FunctionComparator::FunctionHash Hash; +public: + // Note the hash is recalculated potentially multiple times, but it is cheap. + FunctionNode(Function *F) + : F(F), Hash(FunctionComparator::functionHash(*F)) {} + Function *getFunc() const { return F; } + FunctionComparator::FunctionHash getHash() const { return Hash; } + + /// Replace the reference to the function F by the function G, assuming their + /// implementations are equal. + void replaceBy(Function *G) const { + F = G; + } + + void release() { F = nullptr; } +}; +} // end anonymous namespace + +int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { + if (L < R) return -1; + if (L > R) return 1; + return 0; +} + +int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { + if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) + return Res; + if (L.ugt(R)) return 1; + if (R.ugt(L)) return -1; + return 0; +} + +int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { + // Floats are ordered first by semantics (i.e. float, double, half, etc.), + // then by value interpreted as a bitstring (aka APInt). + const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); + if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), + APFloat::semanticsPrecision(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), + APFloat::semanticsMaxExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), + APFloat::semanticsMinExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), + APFloat::semanticsSizeInBits(SR))) + return Res; + return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); +} + +int FunctionComparator::cmpMem(StringRef L, StringRef R) const { + // Prevent heavy comparison, compare sizes first. + if (int Res = cmpNumbers(L.size(), R.size())) + return Res; + + // Compare strings lexicographically only when it is necessary: only when + // strings are equal in size. + return L.compare(R); +} + +int FunctionComparator::cmpAttrs(const AttributeSet L, + const AttributeSet R) const { + if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + return Res; + + for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { + AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), + RE = R.end(i); + for (; LI != LE && RI != RE; ++LI, ++RI) { + Attribute LA = *LI; + Attribute RA = *RI; + if (LA < RA) + return -1; + if (RA < LA) + return 1; + } + if (LI != LE) + return 1; + if (RI != RE) + return -1; + } + return 0; +} + +int FunctionComparator::cmpRangeMetadata(const MDNode* L, + const MDNode* R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + // Range metadata is a sequence of numbers. Make sure they are the same + // sequence. + // TODO: Note that as this is metadata, it is possible to drop and/or merge + // this data when considering functions to merge. Thus this comparison would + // return 0 (i.e. equivalent), but merging would become more complicated + // because the ranges would need to be unioned. It is not likely that + // functions differ ONLY in this metadata if they are actually the same + // function semantically. + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + for (size_t I = 0; I < L->getNumOperands(); ++I) { + ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); + ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); + if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + return Res; + } + return 0; +} + +int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, + const Instruction *R) const { + ImmutableCallSite LCS(L); + ImmutableCallSite RCS(R); + + assert(LCS && RCS && "Must be calls or invokes!"); + assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); + + if (int Res = + cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) + return Res; + + for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { + auto OBL = LCS.getOperandBundleAt(i); + auto OBR = RCS.getOperandBundleAt(i); + + if (int Res = OBL.getTagName().compare(OBR.getTagName())) + return Res; + + if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) + return Res; + } + + return 0; +} + +/// Constants comparison: +/// 1. Check whether type of L constant could be losslessly bitcasted to R +/// type. +/// 2. Compare constant contents. +/// For more details see declaration comments. +int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { + + Type *TyL = L->getType(); + Type *TyR = R->getType(); + + // Check whether types are bitcastable. This part is just re-factored + // Type::canLosslesslyBitCastTo method, but instead of returning true/false, + // we also pack into result which type is "less" for us. + int TypesRes = cmpTypes(TyL, TyR); + if (TypesRes != 0) { + // Types are different, but check whether we can bitcast them. + if (!TyL->isFirstClassType()) { + if (TyR->isFirstClassType()) + return -1; + // Neither TyL nor TyR are values of first class type. Return the result + // of comparing the types + return TypesRes; + } + if (!TyR->isFirstClassType()) { + if (TyL->isFirstClassType()) + return 1; + return TypesRes; + } + + // Vector -> Vector conversions are always lossless if the two vector types + // have the same size, otherwise not. + unsigned TyLWidth = 0; + unsigned TyRWidth = 0; + + if (auto *VecTyL = dyn_cast<VectorType>(TyL)) + TyLWidth = VecTyL->getBitWidth(); + if (auto *VecTyR = dyn_cast<VectorType>(TyR)) + TyRWidth = VecTyR->getBitWidth(); + + if (TyLWidth != TyRWidth) + return cmpNumbers(TyLWidth, TyRWidth); + + // Zero bit-width means neither TyL nor TyR are vectors. + if (!TyLWidth) { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + if (PTyL && PTyR) { + unsigned AddrSpaceL = PTyL->getAddressSpace(); + unsigned AddrSpaceR = PTyR->getAddressSpace(); + if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) + return Res; + } + if (PTyL) + return 1; + if (PTyR) + return -1; + + // TyL and TyR aren't vectors, nor pointers. We don't know how to + // bitcast them. + return TypesRes; + } + } + + // OK, types are bitcastable, now check constant contents. + + if (L->isNullValue() && R->isNullValue()) + return TypesRes; + if (L->isNullValue() && !R->isNullValue()) + return 1; + if (!L->isNullValue() && R->isNullValue()) + return -1; + + auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L)); + auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R)); + if (GlobalValueL && GlobalValueR) { + return cmpGlobalValues(GlobalValueL, GlobalValueR); + } + + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) + return Res; + + if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { + const auto *SeqR = cast<ConstantDataSequential>(R); + // This handles ConstantDataArray and ConstantDataVector. Note that we + // compare the two raw data arrays, which might differ depending on the host + // endianness. This isn't a problem though, because the endiness of a module + // will affect the order of the constants, but this order is the same + // for a given input module and host platform. + return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); + } + + switch (L->getValueID()) { + case Value::UndefValueVal: + case Value::ConstantTokenNoneVal: + return TypesRes; + case Value::ConstantIntVal: { + const APInt &LInt = cast<ConstantInt>(L)->getValue(); + const APInt &RInt = cast<ConstantInt>(R)->getValue(); + return cmpAPInts(LInt, RInt); + } + case Value::ConstantFPVal: { + const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); + const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); + return cmpAPFloats(LAPF, RAPF); + } + case Value::ConstantArrayVal: { + const ConstantArray *LA = cast<ConstantArray>(L); + const ConstantArray *RA = cast<ConstantArray>(R); + uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); + uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), + cast<Constant>(RA->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantStructVal: { + const ConstantStruct *LS = cast<ConstantStruct>(L); + const ConstantStruct *RS = cast<ConstantStruct>(R); + unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (unsigned i = 0; i != NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), + cast<Constant>(RS->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantVectorVal: { + const ConstantVector *LV = cast<ConstantVector>(L); + const ConstantVector *RV = cast<ConstantVector>(R); + unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), + cast<Constant>(RV->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantExprVal: { + const ConstantExpr *LE = cast<ConstantExpr>(L); + const ConstantExpr *RE = cast<ConstantExpr>(R); + unsigned NumOperandsL = LE->getNumOperands(); + unsigned NumOperandsR = RE->getNumOperands(); + if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) + return Res; + for (unsigned i = 0; i < NumOperandsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), + cast<Constant>(RE->getOperand(i)))) + return Res; + } + return 0; + } + case Value::BlockAddressVal: { + const BlockAddress *LBA = cast<BlockAddress>(L); + const BlockAddress *RBA = cast<BlockAddress>(R); + if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) + return Res; + if (LBA->getFunction() == RBA->getFunction()) { + // They are BBs in the same function. Order by which comes first in the + // BB order of the function. This order is deterministic. + Function* F = LBA->getFunction(); + BasicBlock *LBB = LBA->getBasicBlock(); + BasicBlock *RBB = RBA->getBasicBlock(); + if (LBB == RBB) + return 0; + for(BasicBlock &BB : F->getBasicBlockList()) { + if (&BB == LBB) { + assert(&BB != RBB); + return -1; + } + if (&BB == RBB) + return 1; + } + llvm_unreachable("Basic Block Address does not point to a basic block in " + "its function."); + return -1; + } else { + // cmpValues said the functions are the same. So because they aren't + // literally the same pointer, they must respectively be the left and + // right functions. + assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); + // cmpValues will tell us if these are equivalent BasicBlocks, in the + // context of their respective functions. + return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); + } + } + default: // Unknown constant, abort. + DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); + llvm_unreachable("Constant ValueID not recognized."); + return -1; + } +} + +int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) { + return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R)); +} + +/// cmpType - compares two types, +/// defines total ordering among the types set. +/// See method declaration comments for more details. +int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + + const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL.getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL.getIntPtrType(TyR); + + if (TyL == TyR) + return 0; + + if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) + return Res; + + switch (TyL->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release mode. + case Type::IntegerTyID: + return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), + cast<IntegerType>(TyR)->getBitWidth()); + case Type::VectorTyID: { + VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR); + if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements())) + return Res; + return cmpTypes(VTyL->getElementType(), VTyR->getElementType()); + } + // TyL == TyR would have returned true earlier, because types are uniqued. + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + case Type::TokenTyID: + return 0; + + case Type::PointerTyID: { + assert(PTyL && PTyR && "Both types must be pointers here."); + return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); + } + + case Type::StructTyID: { + StructType *STyL = cast<StructType>(TyL); + StructType *STyR = cast<StructType>(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + + if (STyL->isPacked() != STyR->isPacked()) + return cmpNumbers(STyL->isPacked(), STyR->isPacked()); + + for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { + if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) + return Res; + } + return 0; + } + + case Type::FunctionTyID: { + FunctionType *FTyL = cast<FunctionType>(TyL); + FunctionType *FTyR = cast<FunctionType>(TyR); + if (FTyL->getNumParams() != FTyR->getNumParams()) + return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); + + if (FTyL->isVarArg() != FTyR->isVarArg()) + return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); + + if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) + return Res; + + for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { + if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) + return Res; + } + return 0; + } + + case Type::ArrayTyID: { + ArrayType *ATyL = cast<ArrayType>(TyL); + ArrayType *ATyR = cast<ArrayType>(TyR); + if (ATyL->getNumElements() != ATyR->getNumElements()) + return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); + return cmpTypes(ATyL->getElementType(), ATyR->getElementType()); + } + } +} + +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. +// Read method declaration comments for more details. +int FunctionComparator::cmpOperations(const Instruction *L, + const Instruction *R) const { + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to isEquivalentType. + // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top + // * because of the above, we don't test for the tail bit on calls later on + if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) + return Res; + + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + + if (int Res = cmpTypes(L->getType(), R->getType())) + return Res; + + if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), + R->getRawSubclassOptionalData())) + return Res; + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { + if (int Res = cmpTypes(AI->getAllocatedType(), + cast<AllocaInst>(R)->getAllocatedType())) + return Res; + if (int Res = + cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment())) + return Res; + } + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { + if (int Res = + cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + return Res; + } + + // Check special state that is a part of some instructions. + if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { + if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) + return Res; + if (int Res = + cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) + return Res; + return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), + cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { + if (int Res = + cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) + return Res; + return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); + } + if (const CmpInst *CI = dyn_cast<CmpInst>(L)) + return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); + if (const CallInst *CI = dyn_cast<CallInst>(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast<CallInst>(R)->getCallingConv())) + return Res; + if (int Res = + cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) + return Res; + if (int Res = cmpOperandBundlesSchema(CI, R)) + return Res; + return cmpRangeMetadata( + CI->getMetadata(LLVMContext::MD_range), + cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) { + if (int Res = cmpNumbers(II->getCallingConv(), + cast<InvokeInst>(R)->getCallingConv())) + return Res; + if (int Res = + cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes())) + return Res; + if (int Res = cmpOperandBundlesSchema(II, R)) + return Res; + return cmpRangeMetadata( + II->getMetadata(LLVMContext::MD_range), + cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { + ArrayRef<unsigned> LIndices = IVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { + ArrayRef<unsigned> LIndices = EVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { + if (int Res = + cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) + return Res; + return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); + } + + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { + if (int Res = cmpNumbers(CXI->isVolatile(), + cast<AtomicCmpXchgInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(CXI->isWeak(), + cast<AtomicCmpXchgInst>(R)->isWeak())) + return Res; + if (int Res = cmpNumbers(CXI->getSuccessOrdering(), + cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) + return Res; + if (int Res = cmpNumbers(CXI->getFailureOrdering(), + cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) + return Res; + return cmpNumbers(CXI->getSynchScope(), + cast<AtomicCmpXchgInst>(R)->getSynchScope()); + } + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { + if (int Res = cmpNumbers(RMWI->getOperation(), + cast<AtomicRMWInst>(R)->getOperation())) + return Res; + if (int Res = cmpNumbers(RMWI->isVolatile(), + cast<AtomicRMWInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(RMWI->getOrdering(), + cast<AtomicRMWInst>(R)->getOrdering())) + return Res; + return cmpNumbers(RMWI->getSynchScope(), + cast<AtomicRMWInst>(R)->getSynchScope()); + } + return 0; +} + +// Determine whether two GEP operations perform the same underlying arithmetic. +// Read method declaration comments for more details. +int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, + const GEPOperator *GEPR) { + + unsigned int ASL = GEPL->getPointerAddressSpace(); + unsigned int ASR = GEPR->getPointerAddressSpace(); + + if (int Res = cmpNumbers(ASL, ASR)) + return Res; + + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. + const DataLayout &DL = FnL->getParent()->getDataLayout(); + unsigned BitWidth = DL.getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(DL, OffsetL) && + GEPR->accumulateConstantOffset(DL, OffsetR)) + return cmpAPInts(OffsetL, OffsetR); + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) + return Res; + + if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) + return Res; + + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; + } + + return 0; +} + +int FunctionComparator::cmpInlineAsm(const InlineAsm *L, + const InlineAsm *R) const { + // InlineAsm's are uniqued. If they are the same pointer, obviously they are + // the same, otherwise compare the fields. + if (L == R) + return 0; + if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) + return Res; + if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) + return Res; + if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) + return Res; + if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) + return Res; + if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) + return Res; + if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) + return Res; + llvm_unreachable("InlineAsm blocks were not uniqued."); + return 0; +} + +/// Compare two values used by the two functions under pair-wise comparison. If +/// this is the first time the values are seen, they're added to the mapping so +/// that we will detect mismatches on next use. +/// See comments in declaration for more details. +int FunctionComparator::cmpValues(const Value *L, const Value *R) { + // Catch self-reference case. + if (L == FnL) { + if (R == FnR) + return 0; + return -1; + } + if (R == FnR) { + if (L == FnL) + return 0; + return 1; + } + + const Constant *ConstL = dyn_cast<Constant>(L); + const Constant *ConstR = dyn_cast<Constant>(R); + if (ConstL && ConstR) { + if (L == R) + return 0; + return cmpConstants(ConstL, ConstR); + } + + if (ConstL) + return 1; + if (ConstR) + return -1; + + const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); + const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); + + if (InlineAsmL && InlineAsmR) + return cmpInlineAsm(InlineAsmL, InlineAsmR); + if (InlineAsmL) + return 1; + if (InlineAsmR) + return -1; + + auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), + RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); + + return cmpNumbers(LeftSN.first->second, RightSN.first->second); +} +// Test whether two basic blocks have equivalent behaviour. +int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, + const BasicBlock *BBR) { + BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); + BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); + + do { + if (int Res = cmpValues(&*InstL, &*InstR)) + return Res; + + const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL); + const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR); + + if (GEPL && !GEPR) + return 1; + if (GEPR && !GEPL) + return -1; + + if (GEPL && GEPR) { + if (int Res = + cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) + return Res; + if (int Res = cmpGEPs(GEPL, GEPR)) + return Res; + } else { + if (int Res = cmpOperations(&*InstL, &*InstR)) + return Res; + assert(InstL->getNumOperands() == InstR->getNumOperands()); + + for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { + Value *OpL = InstL->getOperand(i); + Value *OpR = InstR->getOperand(i); + if (int Res = cmpValues(OpL, OpR)) + return Res; + // cmpValues should ensure this is true. + assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); + } + } + + ++InstL, ++InstR; + } while (InstL != InstLE && InstR != InstRE); + + if (InstL != InstLE && InstR == InstRE) + return 1; + if (InstL == InstLE && InstR != InstRE) + return -1; + return 0; +} + +// Test whether the two functions have equivalent behaviour. +int FunctionComparator::compare() { + sn_mapL.clear(); + sn_mapR.clear(); + + if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) + return Res; + + if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) + return Res; + + if (FnL->hasGC()) { + if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) + return Res; + } + + if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) + return Res; + + if (FnL->hasSection()) { + if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) + return Res; + } + + if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) + return Res; + + // TODO: if it's internal and only used in direct calls, we could handle this + // case too. + if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) + return Res; + + if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) + return Res; + + assert(FnL->arg_size() == FnR->arg_size() && + "Identically typed functions have different numbers of args!"); + + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), + ArgRI = FnR->arg_begin(), + ArgLE = FnL->arg_end(); + ArgLI != ArgLE; ++ArgLI, ++ArgRI) { + if (cmpValues(&*ArgLI, &*ArgRI) != 0) + llvm_unreachable("Arguments repeat!"); + } + + // We do a CFG-ordered walk since the actual ordering of the blocks in the + // linked list is immaterial. Our walk starts at the entry block for both + // functions, then takes each block from each terminator in order. As an + // artifact, this also means that unreachable blocks are ignored. + SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; + SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. + + FnLBBs.push_back(&FnL->getEntryBlock()); + FnRBBs.push_back(&FnR->getEntryBlock()); + + VisitedBBs.insert(FnLBBs[0]); + while (!FnLBBs.empty()) { + const BasicBlock *BBL = FnLBBs.pop_back_val(); + const BasicBlock *BBR = FnRBBs.pop_back_val(); + + if (int Res = cmpValues(BBL, BBR)) + return Res; + + if (int Res = cmpBasicBlocks(BBL, BBR)) + return Res; + + const TerminatorInst *TermL = BBL->getTerminator(); + const TerminatorInst *TermR = BBR->getTerminator(); + + assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); + for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) + continue; + + FnLBBs.push_back(TermL->getSuccessor(i)); + FnRBBs.push_back(TermR->getSuccessor(i)); + } + } + return 0; +} + +namespace { +// Accumulate the hash of a sequence of 64-bit integers. This is similar to a +// hash of a sequence of 64bit ints, but the entire input does not need to be +// available at once. This interface is necessary for functionHash because it +// needs to accumulate the hash as the structure of the function is traversed +// without saving these values to an intermediate buffer. This form of hashing +// is not often needed, as usually the object to hash is just read from a +// buffer. +class HashAccumulator64 { + uint64_t Hash; +public: + // Initialize to random constant, so the state isn't zero. + HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { + Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); + } + // No finishing is required, because the entire hash value is used. + uint64_t getHash() { return Hash; } +}; +} // end anonymous namespace + +// A function hash is calculated by considering only the number of arguments and +// whether a function is varargs, the order of basic blocks (given by the +// successors of each basic block in depth first order), and the order of +// opcodes of each instruction within each of these basic blocks. This mirrors +// the strategy compare() uses to compare functions by walking the BBs in depth +// first order and comparing each instruction in sequence. Because this hash +// does not look at the operands, it is insensitive to things such as the +// target of calls and the constants used in the function, which makes it useful +// when possibly merging functions which are the same modulo constants and call +// targets. +FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { + HashAccumulator64 H; + H.add(F.isVarArg()); + H.add(F.arg_size()); + + SmallVector<const BasicBlock *, 8> BBs; + SmallSet<const BasicBlock *, 16> VisitedBBs; + + // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), + // accumulating the hash of the function "structure." (BB and opcode sequence) + BBs.push_back(&F.getEntryBlock()); + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes + H.add(45798); + for (auto &Inst : *BB) { + H.add(Inst.getOpcode()); + } + const TerminatorInst *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(Term->getSuccessor(i)).second) + continue; + BBs.push_back(Term->getSuccessor(i)); + } + } + return H.getHash(); +} + + +namespace { + +/// MergeFunctions finds functions which will generate identical machine code, +/// by considering all pointer types to be equivalent. Once identified, +/// MergeFunctions will fold them by replacing a call to one to a call to a +/// bitcast of the other. +/// +class MergeFunctions : public ModulePass { +public: + static char ID; + MergeFunctions() + : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(), + HasGlobalAliases(false) { + initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + +private: + // The function comparison operator is provided here so that FunctionNodes do + // not need to become larger with another pointer. + class FunctionNodeCmp { + GlobalNumberState* GlobalNumbers; + public: + FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {} + bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const { + // Order first by hashes, then full function comparison. + if (LHS.getHash() != RHS.getHash()) + return LHS.getHash() < RHS.getHash(); + FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers); + return FCmp.compare() == -1; + } + }; + typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType; + + GlobalNumberState GlobalNumbers; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + std::vector<WeakVH> Deferred; + + /// Checks the rules of order relation introduced among functions set. + /// Returns true, if sanity check has been passed, and false if failed. + bool doSanityCheck(std::vector<WeakVH> &Worklist); + + /// Insert a ComparableFunction into the FnTree, or merge it away if it's + /// equal to one that's already present. + bool insert(Function *NewFunction); + + /// Remove a Function from the FnTree and queue it up for a second sweep of + /// analysis. + void remove(Function *F); + + /// Find the functions that use this Value and remove them from FnTree and + /// queue the functions. + void removeUsers(Value *V); + + /// Replace all direct calls of Old with calls of New. Will bitcast New if + /// necessary to make types match. + void replaceDirectCallers(Function *Old, Function *New); + + /// Merge two equivalent functions. Upon completion, G may be deleted, or may + /// be converted into a thunk. In either case, it should never be visited + /// again. + void mergeTwoFunctions(Function *F, Function *G); + + /// Replace G with a thunk or an alias to F. Deletes G. + void writeThunkOrAlias(Function *F, Function *G); + + /// Replace G with a simple tail call to bitcast(F). Also replace direct uses + /// of G with bitcast(F). Deletes G. + void writeThunk(Function *F, Function *G); + + /// Replace G with an alias to F. Deletes G. + void writeAlias(Function *F, Function *G); + + /// Replace function F with function G in the function tree. + void replaceFunctionInTree(const FunctionNode &FN, Function *G); + + /// The set of all distinct functions. Use the insert() and remove() methods + /// to modify it. The map allows efficient lookup and deferring of Functions. + FnTreeType FnTree; + // Map functions to the iterators of the FunctionNode which contains them + // in the FnTree. This must be updated carefully whenever the FnTree is + // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid + // dangling iterators into FnTree. The invariant that preserves this is that + // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree. + ValueMap<Function*, FnTreeType::iterator> FNodesInTree; + + /// Whether or not the target supports global aliases. + bool HasGlobalAliases; +}; + +} // end anonymous namespace + +char MergeFunctions::ID = 0; +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { + if (const unsigned Max = NumFunctionsForSanityCheck) { + unsigned TripleNumber = 0; + bool Valid = true; + + dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; + + unsigned i = 0; + for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); + I != E && i < Max; ++I, ++i) { + unsigned j = i; + for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { + Function *F1 = cast<Function>(*I); + Function *F2 = cast<Function>(*J); + int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); + int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare(); + + // If F1 <= F2, then F2 >= F1, otherwise report failure. + if (Res1 != -Res2) { + dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber + << "\n"; + F1->dump(); + F2->dump(); + Valid = false; + } + + if (Res1 == 0) + continue; + + unsigned k = j; + for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; + ++k, ++K, ++TripleNumber) { + if (K == J) + continue; + + Function *F3 = cast<Function>(*K); + int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare(); + int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare(); + + bool Transitive = true; + + if (Res1 != 0 && Res1 == Res4) { + // F1 > F2, F2 > F3 => F1 > F3 + Transitive = Res3 == Res1; + } else if (Res3 != 0 && Res3 == -Res4) { + // F1 > F3, F3 > F2 => F1 > F2 + Transitive = Res3 == Res1; + } else if (Res4 != 0 && -Res3 == Res4) { + // F2 > F3, F3 > F1 => F2 > F1 + Transitive = Res4 == -Res1; + } + + if (!Transitive) { + dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " + << TripleNumber << "\n"; + dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " + << Res4 << "\n"; + F1->dump(); + F2->dump(); + F3->dump(); + Valid = false; + } + } + } + } + + dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; + return Valid; + } + return true; +} + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + + // All functions in the module, ordered by hash. Functions with a unique + // hash value are easily eliminated. + std::vector<std::pair<FunctionComparator::FunctionHash, Function *>> + HashedFuncs; + for (Function &Func : M) { + if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) { + HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); + } + } + + std::stable_sort( + HashedFuncs.begin(), HashedFuncs.end(), + [](const std::pair<FunctionComparator::FunctionHash, Function *> &a, + const std::pair<FunctionComparator::FunctionHash, Function *> &b) { + return a.first < b.first; + }); + + auto S = HashedFuncs.begin(); + for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) { + // If the hash value matches the previous value or the next one, we must + // consider merging it. Otherwise it is dropped and never considered again. + if ((I != S && std::prev(I)->first == I->first) || + (std::next(I) != IE && std::next(I)->first == I->first) ) { + Deferred.push_back(WeakVH(I->second)); + } + } + + do { + std::vector<WeakVH> Worklist; + Deferred.swap(Worklist); + + DEBUG(doSanityCheck(Worklist)); + + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); + DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); + + // Insert only strong functions and merge them. Strong function merging + // always deletes one of them. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + !F->mayBeOverridden()) { + Changed |= insert(F); + } + } + + // Insert only weak functions and merge them. By doing these second we + // create thunks to the strong function when possible. When two weak + // functions are identical, we create a new strong function with two weak + // weak thunks to it which are identical but not mergable. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + F->mayBeOverridden()) { + Changed |= insert(F); + } + } + DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n'); + } while (!Deferred.empty()); + + FnTree.clear(); + GlobalNumbers.clear(); + + return Changed; +} + +// Replace direct callers of Old with New. +void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { + Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); + for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) { + Use *U = &*UI; + ++UI; + CallSite CS(U->getUser()); + if (CS && CS.isCallee(U)) { + // Transfer the called function's attributes to the call site. Due to the + // bitcast we will 'lose' ABI changing attributes because the 'called + // function' is no longer a Function* but the bitcast. Code that looks up + // the attributes from the called function will fail. + + // FIXME: This is not actually true, at least not anymore. The callsite + // will always have the same ABI affecting attributes as the callee, + // because otherwise the original input has UB. Note that Old and New + // always have matching ABI, so no attributes need to be changed. + // Transferring other attributes may help other optimizations, but that + // should be done uniformly and not in this ad-hoc way. + auto &Context = New->getContext(); + auto NewFuncAttrs = New->getAttributes(); + auto CallSiteAttrs = CS.getAttributes(); + + CallSiteAttrs = CallSiteAttrs.addAttributes( + Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes()); + + for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) { + AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx); + if (Attrs.getNumSlots()) + CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs); + } + + CS.setAttributes(CallSiteAttrs); + + remove(CS.getInstruction()->getParent()->getParent()); + U->set(BitcastNew); + } + } +} + +// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. +void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { + if (HasGlobalAliases && G->hasUnnamedAddr()) { + if (G->hasExternalLinkage() || G->hasLocalLinkage() || + G->hasWeakLinkage()) { + writeAlias(F, G); + return; + } + } + + writeThunk(F, G); +} + +// Helper for writeThunk, +// Selects proper bitcast operation, +// but a bit simpler then CastInst::getCastOpcode. +static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { + Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = createCast( + Builder, Builder.CreateExtractValue(V, makeArrayRef(I)), + DestTy->getStructElementType(I)); + + Result = + Builder.CreateInsertValue(Result, Element, makeArrayRef(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + return Builder.CreateIntToPtr(V, DestTy); + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + return Builder.CreatePtrToInt(V, DestTy); + else + return Builder.CreateBitCast(V, DestTy); +} + +// Replace G with a simple tail call to bitcast(F). Also replace direct uses +// of G with bitcast(F). Deletes G. +void MergeFunctions::writeThunk(Function *F, Function *G) { + if (!G->mayBeOverridden()) { + // Redirect direct callers of G to F. + replaceDirectCallers(G, F); + } + + // If G was internal then we may have replaced all uses of G with F. If so, + // stop here and delete G. There's no need for a thunk. + if (G->hasLocalLinkage() && G->use_empty()) { + G->eraseFromParent(); + return; + } + + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", + G->getParent()); + BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); + IRBuilder<false> Builder(BB); + + SmallVector<Value *, 16> Args; + unsigned i = 0; + FunctionType *FFTy = F->getFunctionType(); + for (Argument & AI : NewG->args()) { + Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i))); + ++i; + } + + CallInst *CI = Builder.CreateCall(F, Args); + CI->setTailCall(); + CI->setCallingConv(F->getCallingConv()); + CI->setAttributes(F->getAttributes()); + if (NewG->getReturnType()->isVoidTy()) { + Builder.CreateRetVoid(); + } else { + Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType())); + } + + NewG->copyAttributesFrom(G); + NewG->takeName(G); + removeUsers(G); + G->replaceAllUsesWith(NewG); + G->eraseFromParent(); + + DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); + ++NumThunksWritten; +} + +// Replace G with an alias to F and delete G. +void MergeFunctions::writeAlias(Function *F, Function *G) { + auto *GA = GlobalAlias::create(G->getLinkage(), "", F); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + removeUsers(G); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); + + DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); + ++NumAliasesWritten; +} + +// Merge two equivalent functions. Upon completion, Function G is deleted. +void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { + if (F->mayBeOverridden()) { + assert(G->mayBeOverridden()); + + // Make them both thunks to the same internal function. + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + removeUsers(F); + F->replaceAllUsesWith(H); + + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); + + if (HasGlobalAliases) { + writeAlias(F, G); + writeAlias(F, H); + } else { + writeThunk(F, G); + writeThunk(F, H); + } + + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::PrivateLinkage); + ++NumDoubleWeak; + } else { + writeThunkOrAlias(F, G); + } + + ++NumFunctionsMerged; +} + +/// Replace function F by function G. +void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN, + Function *G) { + Function *F = FN.getFunc(); + assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 && + "The two functions must be equal"); + + auto I = FNodesInTree.find(F); + assert(I != FNodesInTree.end() && "F should be in FNodesInTree"); + assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); + + FnTreeType::iterator IterToFNInFnTree = I->second; + assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree."); + // Remove F -> FN and insert G -> FN + FNodesInTree.erase(I); + FNodesInTree.insert({G, IterToFNInFnTree}); + // Replace F with G in FN, which is stored inside the FnTree. + FN.replaceBy(G); +} + +// Insert a ComparableFunction into the FnTree, or merge it away if equal to one +// that was already inserted. +bool MergeFunctions::insert(Function *NewFunction) { + std::pair<FnTreeType::iterator, bool> Result = + FnTree.insert(FunctionNode(NewFunction)); + + if (Result.second) { + assert(FNodesInTree.count(NewFunction) == 0); + FNodesInTree.insert({NewFunction, Result.first}); + DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); + return false; + } + + const FunctionNode &OldF = *Result.first; + + // Don't merge tiny functions, since it can just end up making the function + // larger. + // FIXME: Should still merge them if they are unnamed_addr and produce an + // alias. + if (NewFunction->size() == 1) { + if (NewFunction->front().size() <= 2) { + DEBUG(dbgs() << NewFunction->getName() + << " is to small to bother merging\n"); + return false; + } + } + + // Impose a total order (by name) on the replacement of functions. This is + // important when operating on more than one module independently to prevent + // cycles of thunks calling each other when the modules are linked together. + // + // When one function is weak and the other is strong there is an order imposed + // already. We process strong functions before weak functions. + if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) || + (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden())) + if (OldF.getFunc()->getName() > NewFunction->getName()) { + // Swap the two functions. + Function *F = OldF.getFunc(); + replaceFunctionInTree(*Result.first, NewFunction); + NewFunction = F; + assert(OldF.getFunc() != F && "Must have swapped the functions."); + } + + // Never thunk a strong function to a weak function. + assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); + + DEBUG(dbgs() << " " << OldF.getFunc()->getName() + << " == " << NewFunction->getName() << '\n'); + + Function *DeleteF = NewFunction; + mergeTwoFunctions(OldF.getFunc(), DeleteF); + return true; +} + +// Remove a function from FnTree. If it was already in FnTree, add +// it to Deferred so that we'll look at it in the next round. +void MergeFunctions::remove(Function *F) { + auto I = FNodesInTree.find(F); + if (I != FNodesInTree.end()) { + DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n"); + FnTree.erase(I->second); + // I->second has been invalidated, remove it from the FNodesInTree map to + // preserve the invariant. + FNodesInTree.erase(I); + Deferred.emplace_back(F); + } +} + +// For each instruction used by the value, remove() the function that contains +// the instruction. This should happen right before a call to RAUW. +void MergeFunctions::removeUsers(Value *V) { + std::vector<Value *> Worklist; + Worklist.push_back(V); + SmallSet<Value*, 8> Visited; + Visited.insert(V); + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + + for (User *U : V->users()) { + if (Instruction *I = dyn_cast<Instruction>(U)) { + remove(I->getParent()->getParent()); + } else if (isa<GlobalValue>(U)) { + // do nothing + } else if (Constant *C = dyn_cast<Constant>(U)) { + for (User *UU : C->users()) { + if (!Visited.insert(UU).second) + Worklist.push_back(UU); + } + } + } + } +} diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp new file mode 100644 index 0000000..0c5c84b --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -0,0 +1,183 @@ +//===- PartialInlining.cpp - Inline parts of functions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs partial inlining, typically by inlining an if statement +// that surrounds the body of the function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" +using namespace llvm; + +#define DEBUG_TYPE "partialinlining" + +STATISTIC(NumPartialInlined, "Number of functions partially inlined"); + +namespace { + struct PartialInliner : public ModulePass { + void getAnalysisUsage(AnalysisUsage &AU) const override { } + static char ID; // Pass identification, replacement for typeid + PartialInliner() : ModulePass(ID) { + initializePartialInlinerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module& M) override; + + private: + Function* unswitchFunction(Function* F); + }; +} + +char PartialInliner::ID = 0; +INITIALIZE_PASS(PartialInliner, "partial-inliner", + "Partial Inliner", false, false) + +ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } + +Function* PartialInliner::unswitchFunction(Function* F) { + // First, verify that this function is an unswitching candidate... + BasicBlock *entryBlock = &F->front(); + BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); + if (!BR || BR->isUnconditional()) + return nullptr; + + BasicBlock* returnBlock = nullptr; + BasicBlock* nonReturnBlock = nullptr; + unsigned returnCount = 0; + for (BasicBlock *BB : successors(entryBlock)) { + if (isa<ReturnInst>(BB->getTerminator())) { + returnBlock = BB; + returnCount++; + } else + nonReturnBlock = BB; + } + + if (returnCount != 1) + return nullptr; + + // Clone the function, so that we can hack away on it. + ValueToValueMapTy VMap; + Function* duplicateFunction = CloneFunction(F, VMap, + /*ModuleLevelChanges=*/false); + duplicateFunction->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(duplicateFunction); + BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); + BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); + + // Go ahead and update all uses to the duplicate, so that we can just + // use the inliner functionality when we're done hacking. + F->replaceAllUsesWith(duplicateFunction); + + // Special hackery is needed with PHI nodes that have inputs from more than + // one extracted block. For simplicity, just split the PHIs into a two-level + // sequence of PHIs, some of which will go in the extracted region, and some + // of which will go outside. + BasicBlock* preReturn = newReturnBlock; + newReturnBlock = newReturnBlock->splitBasicBlock( + newReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock::iterator I = preReturn->begin(); + Instruction *Ins = &newReturnBlock->front(); + while (I != preReturn->end()) { + PHINode* OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) break; + + PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); + OldPhi->replaceAllUsesWith(retPhi); + Ins = newReturnBlock->getFirstNonPHI(); + + retPhi->addIncoming(&*I, preReturn); + retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), + newEntryBlock); + OldPhi->removeIncomingValue(newEntryBlock); + + ++I; + } + newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); + + // Gather up the blocks that we're going to extract. + std::vector<BasicBlock*> toExtract; + toExtract.push_back(newNonReturnBlock); + for (Function::iterator FI = duplicateFunction->begin(), + FE = duplicateFunction->end(); FI != FE; ++FI) + if (&*FI != newEntryBlock && &*FI != newReturnBlock && + &*FI != newNonReturnBlock) + toExtract.push_back(&*FI); + + // The CodeExtractor needs a dominator tree. + DominatorTree DT; + DT.recalculate(*duplicateFunction); + + // Extract the body of the if. + Function* extractedFunction + = CodeExtractor(toExtract, &DT).extractCodeRegion(); + + InlineFunctionInfo IFI; + + // Inline the top-level if test into all callers. + std::vector<User *> Users(duplicateFunction->user_begin(), + duplicateFunction->user_end()); + for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); + UI != UE; ++UI) + if (CallInst *CI = dyn_cast<CallInst>(*UI)) + InlineFunction(CI, IFI); + else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) + InlineFunction(II, IFI); + + // Ditch the duplicate, since we're done with it, and rewrite all remaining + // users (function pointers, etc.) back to the original function. + duplicateFunction->replaceAllUsesWith(F); + duplicateFunction->eraseFromParent(); + + ++NumPartialInlined; + + return extractedFunction; +} + +bool PartialInliner::runOnModule(Module& M) { + std::vector<Function*> worklist; + worklist.reserve(M.size()); + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) + if (!FI->use_empty() && !FI->isDeclaration()) + worklist.push_back(&*FI); + + bool changed = false; + while (!worklist.empty()) { + Function* currFunc = worklist.back(); + worklist.pop_back(); + + if (currFunc->use_empty()) continue; + + bool recursive = false; + for (User *U : currFunc->users()) + if (Instruction* I = dyn_cast<Instruction>(U)) + if (I->getParent()->getParent() == currFunc) { + recursive = true; + break; + } + if (recursive) continue; + + + if (Function* newFunc = unswitchFunction(currFunc)) { + worklist.push_back(newFunc); + changed = true; + } + + } + + return changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp new file mode 100644 index 0000000..9876efa --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -0,0 +1,733 @@ +//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/FunctionInfo.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Vectorize.h" + +using namespace llvm; + +static cl::opt<bool> +RunLoopVectorization("vectorize-loops", cl::Hidden, + cl::desc("Run the Loop vectorization passes")); + +static cl::opt<bool> +RunSLPVectorization("vectorize-slp", cl::Hidden, + cl::desc("Run the SLP vectorization passes")); + +static cl::opt<bool> +RunBBVectorization("vectorize-slp-aggressive", cl::Hidden, + cl::desc("Run the BB vectorization passes")); + +static cl::opt<bool> +UseGVNAfterVectorization("use-gvn-after-vectorization", + cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); + +static cl::opt<bool> ExtraVectorizerPasses( + "extra-vectorizer-passes", cl::init(false), cl::Hidden, + cl::desc("Run cleanup optimization passes after vectorization.")); + +static cl::opt<bool> UseNewSROA("use-new-sroa", + cl::init(true), cl::Hidden, + cl::desc("Enable the new, experimental SROA pass")); + +static cl::opt<bool> +RunLoopRerolling("reroll-loops", cl::Hidden, + cl::desc("Run the loop rerolling pass")); + +static cl::opt<bool> +RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), + cl::desc("Run the float2int (float demotion) pass")); + +static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), + cl::Hidden, + cl::desc("Run the load combining pass")); + +static cl::opt<bool> +RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", + cl::init(true), cl::Hidden, + cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " + "vectorizer instead of before")); + +static cl::opt<bool> UseCFLAA("use-cfl-aa", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis")); + +static cl::opt<bool> +EnableMLSM("mlsm", cl::init(true), cl::Hidden, + cl::desc("Enable motion of merged load and store")); + +static cl::opt<bool> EnableLoopInterchange( + "enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopInterchange Pass")); + +static cl::opt<bool> EnableLoopDistribute( + "enable-loop-distribute", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopDistribution Pass")); + +static cl::opt<bool> EnableNonLTOGlobalsModRef( + "enable-non-lto-gmr", cl::init(true), cl::Hidden, + cl::desc( + "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); + +static cl::opt<bool> EnableLoopLoadElim( + "enable-loop-load-elim", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopLoadElimination Pass")); + +PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + FunctionIndex = nullptr; + DisableUnitAtATime = false; + DisableUnrollLoops = false; + BBVectorize = RunBBVectorization; + SLPVectorize = RunSLPVectorization; + LoopVectorize = RunLoopVectorization; + RerollLoops = RunLoopRerolling; + LoadCombine = RunLoadCombine; + DisableGVNLoadPRE = false; + VerifyInput = false; + VerifyOutput = false; + MergeFunctions = false; + PrepareForLTO = false; +} + +PassManagerBuilder::~PassManagerBuilder() { + delete LibraryInfo; + delete Inliner; +} + +/// Set of global extensions, automatically added as part of the standard set. +static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, + PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; + +void PassManagerBuilder::addGlobalExtension( + PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + GlobalExtensions->push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { + Extensions.push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, + legacy::PassManagerBase &PM) const { + for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) + if ((*GlobalExtensions)[i].first == ETy) + (*GlobalExtensions)[i].second(*this, PM); + for (unsigned i = 0, e = Extensions.size(); i != e; ++i) + if (Extensions[i].first == ETy) + Extensions[i].second(*this, PM); +} + +void PassManagerBuilder::addInitialAliasAnalysisPasses( + legacy::PassManagerBase &PM) const { + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + if (UseCFLAA) + PM.add(createCFLAAWrapperPass()); + PM.add(createTypeBasedAAWrapperPass()); + PM.add(createScopedNoAliasAAWrapperPass()); +} + +void PassManagerBuilder::populateFunctionPassManager( + legacy::FunctionPassManager &FPM) { + addExtensionsToPM(EP_EarlyAsPossible, FPM); + + // Add LibraryInfo if we have some. + if (LibraryInfo) + FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + FPM.add(createCFGSimplificationPass()); + if (UseNewSROA) + FPM.add(createSROAPass()); + else + FPM.add(createScalarReplAggregatesPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); +} + +void PassManagerBuilder::populateModulePassManager( + legacy::PassManagerBase &MPM) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + + // If all optimizations are disabled, just run the always-inline pass and, + // if enabled, the function merging pass. + if (OptLevel == 0) { + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + + // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly + // creates a CGSCC pass manager, but we don't want to add extensions into + // that pass manager. To prevent this we insert a no-op module pass to reset + // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 + // builds. The function merging pass is + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + else if (!GlobalExtensions->empty() || !Extensions.empty()) + MPM.add(createBarrierNoopPass()); + + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (!DisableUnitAtATime) { + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars + MPM.add(createPromoteMemoryToRegisterPass()); + + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + } + + if (EnableNonLTOGlobalsModRef) + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + + // Start of CallGraph SCC passes. + if (!DisableUnitAtATime) + MPM.add(createPruneEHPass()); // Remove dead EH info + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + if (!DisableUnitAtATime) + MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. + if (UseNewSROA) + MPM.add(createSROAPass()); + else + MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); + + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + if (EnableLoopInterchange) { + MPM.add(createLoopInterchangePass()); // Interchange loops + MPM.add(createCFGSimplificationPass()); + } + if (!DisableUnrollLoops) + MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + + if (OptLevel > 1) { + if (EnableMLSM) + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + } + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass()); + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + if (RerollLoops) + MPM.add(createLoopRerollPass()); + if (!RunSLPAfterLoopVectorization) { + if (SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } + } + + if (LoadCombine) + MPM.add(createLoadCombinePass()); + + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Clean up after everything. + addExtensionsToPM(EP_Peephole, MPM); + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + MPM.add(createBarrierNoopPass()); + + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE later, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead + // and saves running remaining passes on the eliminated functions. + MPM.add(createEliminateAvailableExternallyPass()); + } + + if (EnableNonLTOGlobalsModRef) + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsAAWrapperPass()); + + if (RunFloat2Int) + MPM.add(createFloat2IntPass()); + + addExtensionsToPM(EP_VectorizerStart, MPM); + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. Disable header duplication at -Oz. + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. + if (EnableLoopDistribute) + MPM.add(createLoopDistributePass()); + + MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + if (EnableLoopLoadElim) + MPM.add(createLoopLoadEliminationPass()); + + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + MPM.add(createInstructionCombiningPass()); + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correllated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + MPM.add(createEarlyCSEPass()); + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createInstructionCombiningPass()); + MPM.add(createLICMPass()); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + } + + if (RunSLPAfterLoopVectorization) { + if (SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); + } + } + + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } + } + + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + + if (!DisableUnrollLoops) { + MPM.add(createLoopUnrollPass()); // Unroll small loops + + // LoopUnroll may generate some redundency to cleanup. + MPM.add(createInstructionCombiningPass()); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass()); + } + + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + MPM.add(createAlignmentFromAssumptionsPass()); + + if (!DisableUnitAtATime) { + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O2 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 1) { + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + } + + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + + addExtensionsToPM(EP_OptimizerLast, MPM); +} + +void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { + // Provide AliasAnalysis services for optimizations. + addInitialAliasAnalysisPasses(PM); + + if (FunctionIndex) + PM.add(createFunctionImportPass(FunctionIndex)); + + // Allow forcing function attributes as a debugging and tuning aid. + PM.add(createForceFunctionAttrsLegacyPass()); + + // Infer attributes about declarations if possible. + PM.add(createInferFunctionAttrsLegacyPass()); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + + // Now that we internalized some globals, see if we can hack on them! + PM.add(createFunctionAttrsPass()); // Add norecurse if possible. + PM.add(createGlobalOptimizerPass()); + // Promote any localized global vars. + PM.add(createPromoteMemoryToRegisterPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant. + PM.add(createConstantMergePass()); + + // Remove unused arguments from functions. + PM.add(createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); + + // Inline small functions + bool RunInliner = Inliner; + if (RunInliner) { + PM.add(Inliner); + Inliner = nullptr; + } + + PM.add(createPruneEHPass()); // Remove dead EH info. + + // Optimize globals again if we ran the inliner. + if (RunInliner) + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); // Remove dead functions. + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + PM.add(createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); + PM.add(createJumpThreadingPass()); + + // Break up allocas + if (UseNewSROA) + PM.add(createSROAPass()); + else + PM.add(createScalarReplAggregatesPass()); + + // Run a few AA driven optimizations here and now, to cleanup the code. + PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. + + PM.add(createLICMPass()); // Hoist loop invariants. + if (EnableMLSM) + PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. + PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. + + // Nuke dead stores. + PM.add(createDeadStoreEliminationPass()); + + // More loops are countable; try to optimize them. + PM.add(createIndVarSimplifyPass()); + PM.add(createLoopDeletionPass()); + if (EnableLoopInterchange) + PM.add(createLoopInterchangePass()); + + PM.add(createLoopVectorizePass(true, LoopVectorize)); + + // Now that we've optimized loops (in particular loop induction variables), + // we may have exposed more scalar opportunities. Run parts of the scalar + // optimizer again at this point. + PM.add(createInstructionCombiningPass()); // Initial cleanup + PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createSCCPPass()); // Propagate exposed constants + PM.add(createInstructionCombiningPass()); // Clean up again + PM.add(createBitTrackingDCEPass()); + + // More scalar chains could be vectorized due to more alias information + if (RunSLPAfterLoopVectorization) + if (SLPVectorize) + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + // After vectorization, assume intrinsics may tell us more about pointer + // alignments. + PM.add(createAlignmentFromAssumptionsPass()); + + if (LoadCombine) + PM.add(createLoadCombinePass()); + + // Cleanup and simplify the code after the scalar optimizations. + PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); + + PM.add(createJumpThreadingPass()); +} + +void PassManagerBuilder::addLateLTOOptimizationPasses( + legacy::PassManagerBase &PM) { + // Delete basic blocks, which optimization passes may have killed. + PM.add(createCFGSimplificationPass()); + + // Drop bodies of available externally objects to improve GlobalDCE. + PM.add(createEliminateAvailableExternallyPass()); + + // Now that we have optimized the program, discard unreachable functions. + PM.add(createGlobalDCEPass()); + + // FIXME: this is profitable (for compiler time) to do at -O0 too, but + // currently it damages debug info. + if (MergeFunctions) + PM.add(createMergeFunctionsPass()); +} + +void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { + if (LibraryInfo) + PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (VerifyInput) + PM.add(createVerifierPass()); + + if (OptLevel > 1) + addLTOOptimizationPasses(PM); + + // Create a function that performs CFI checks for cross-DSO calls with targets + // in the current module. + PM.add(createCrossDSOCFIPass()); + + // Lower bit sets to globals. This pass supports Clang's control flow + // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI + // is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerBitSetsPass()); + + if (OptLevel != 0) + addLateLTOOptimizationPasses(PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); +} + +inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { + return reinterpret_cast<PassManagerBuilder*>(P); +} + +inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { + return reinterpret_cast<LLVMPassManagerBuilderRef>(P); +} + +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { + PassManagerBuilder *PMB = new PassManagerBuilder(); + return wrap(PMB); +} + +void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + delete Builder; +} + +void +LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->OptLevel = OptLevel; +} + +void +LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->SizeLevel = SizeLevel; +} + +void +LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnitAtATime = Value; +} + +void +LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnrollLoops = Value; +} + +void +LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + // NOTE: The simplify-libcalls pass has been removed. +} + +void +LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, + unsigned Threshold) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->Inliner = createFunctionInliningPass(Threshold); +} + +void +LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM); + Builder->populateFunctionPassManager(*FPM); +} + +void +LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::PassManagerBase *MPM = unwrap(PM); + Builder->populateModulePassManager(*MPM); +} + +void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM, + LLVMBool Internalize, + LLVMBool RunInliner) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::PassManagerBase *LPM = unwrap(PM); + + // A small backwards compatibility hack. populateLTOPassManager used to take + // an RunInliner option. + if (RunInliner && !Builder->Inliner) + Builder->Inliner = createFunctionInliningPass(); + + Builder->populateLTOPassManager(*LPM); +} diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp new file mode 100644 index 0000000..3af4afb --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -0,0 +1,273 @@ +//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, turning invoke instructions into calls, iff the callee cannot +// throw an exception, and marking functions 'nounwind' if they cannot throw. +// It implements this as a bottom-up traversal of the call-graph. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "prune-eh" + +STATISTIC(NumRemoved, "Number of invokes removed"); +STATISTIC(NumUnreach, "Number of noreturn calls optimized"); + +namespace { + struct PruneEH : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + PruneEH() : CallGraphSCCPass(ID) { + initializePruneEHPass(*PassRegistry::getPassRegistry()); + } + + // runOnSCC - Analyze the SCC, performing the transformation if possible. + bool runOnSCC(CallGraphSCC &SCC) override; + + bool SimplifyFunction(Function *F); + void DeleteBasicBlock(BasicBlock *BB); + }; +} + +char PruneEH::ID = 0; +INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) + +Pass *llvm::createPruneEHPass() { return new PruneEH(); } + + +bool PruneEH::runOnSCC(CallGraphSCC &SCC) { + SmallPtrSet<CallGraphNode *, 8> SCCNodes; + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + bool MadeChange = false; + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert(*I); + + // First pass, scan all of the functions in the SCC, simplifying them + // according to what we know. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (Function *F = (*I)->getFunction()) + MadeChange |= SimplifyFunction(F); + + // Next, check to see if any callees might throw or if there are any external + // functions in this SCC: if so, we cannot prune any functions in this SCC. + // Definitions that are weak and not declared non-throwing might be + // overridden at linktime with something that throws, so assume that. + // If this SCC includes the unwind instruction, we KNOW it throws, so + // obviously the SCC might throw. + // + bool SCCMightUnwind = false, SCCMightReturn = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); + (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { + Function *F = (*I)->getFunction(); + if (!F) { + SCCMightUnwind = true; + SCCMightReturn = true; + } else if (F->isDeclaration() || F->mayBeOverridden()) { + SCCMightUnwind |= !F->doesNotThrow(); + SCCMightReturn |= !F->doesNotReturn(); + } else { + bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); + bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); + // Determine if we should scan for InlineAsm in a naked function as it + // is the only way to return without a ReturnInst. Only do this for + // no-inline functions as functions which may be inlined cannot + // meaningfully return via assembly. + bool CheckReturnViaAsm = CheckReturn && + F->hasFnAttribute(Attribute::Naked) && + F->hasFnAttribute(Attribute::NoInline); + + if (!CheckUnwind && !CheckReturn) + continue; + + for (const BasicBlock &BB : *F) { + const TerminatorInst *TI = BB.getTerminator(); + if (CheckUnwind && TI->mayThrow()) { + SCCMightUnwind = true; + } else if (CheckReturn && isa<ReturnInst>(TI)) { + SCCMightReturn = true; + } + + for (const Instruction &I : BB) { + if ((!CheckUnwind || SCCMightUnwind) && + (!CheckReturnViaAsm || SCCMightReturn)) + break; + + // Check to see if this function performs an unwind or calls an + // unwinding function. + if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) { + bool InstMightUnwind = true; + if (const auto *CI = dyn_cast<CallInst>(&I)) { + if (Function *Callee = CI->getCalledFunction()) { + CallGraphNode *CalleeNode = CG[Callee]; + // If the callee is outside our current SCC then we may throw + // because it might. If it is inside, do nothing. + if (SCCNodes.count(CalleeNode) > 0) + InstMightUnwind = false; + } + } + SCCMightUnwind |= InstMightUnwind; + } + if (CheckReturnViaAsm && !SCCMightReturn) + if (auto ICS = ImmutableCallSite(&I)) + if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue())) + if (IA->hasSideEffects()) + SCCMightReturn = true; + } + + if (SCCMightUnwind && SCCMightReturn) + break; + } + } + } + + // If the SCC doesn't unwind or doesn't throw, note this fact. + if (!SCCMightUnwind || !SCCMightReturn) + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { + F->addFnAttr(Attribute::NoUnwind); + MadeChange = true; + } + + if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) { + F->addFnAttr(Attribute::NoReturn); + MadeChange = true; + } + } + + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + // Convert any invoke instructions to non-throwing functions in this node + // into call instructions with a branch. This makes the exception blocks + // dead. + if (Function *F = (*I)->getFunction()) + MadeChange |= SimplifyFunction(F); + } + + return MadeChange; +} + + +// SimplifyFunction - Given information about callees, simplify the specified +// function if we have invokes to non-unwinding functions or code after calls to +// no-return functions. +bool PruneEH::SimplifyFunction(Function *F) { + bool MadeChange = false; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) + if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) { + SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); + + // Anything that used the value produced by the invoke instruction + // now uses the value produced by the call instruction. Note that we + // do this even for void functions and calls with no uses so that the + // callgraph edge is updated. + II->replaceAllUsesWith(Call); + BasicBlock *UnwindBlock = II->getUnwindDest(); + UnwindBlock->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the + // invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + BB->getInstList().pop_back(); + + // If the unwind block is now dead, nuke it. + if (pred_empty(UnwindBlock)) + DeleteBasicBlock(UnwindBlock); // Delete the new BB. + + ++NumRemoved; + MadeChange = true; + } + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) + if (CallInst *CI = dyn_cast<CallInst>(I++)) + if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) { + // This call calls a function that cannot return. Insert an + // unreachable instruction after it and simplify the code. Do this + // by splitting the BB, adding the unreachable, then deleting the + // new BB. + BasicBlock *New = BB->splitBasicBlock(I); + + // Remove the uncond branch and add an unreachable. + BB->getInstList().pop_back(); + new UnreachableInst(BB->getContext(), &*BB); + + DeleteBasicBlock(New); // Delete the new BB. + MadeChange = true; + ++NumUnreach; + break; + } + } + + return MadeChange; +} + +/// DeleteBasicBlock - remove the specified basic block from the program, +/// updating the callgraph to reflect any now-obsolete edges due to calls that +/// exist in the BB. +void PruneEH::DeleteBasicBlock(BasicBlock *BB) { + assert(pred_empty(BB) && "BB is not dead!"); + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + + CallGraphNode *CGN = CG[BB->getParent()]; + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { + --I; + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (!isa<IntrinsicInst>(I)) + CGN->removeCallEdgeFor(CI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + CGN->removeCallEdgeFor(II); + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + + // Get the list of successors of this block. + std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB)); + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + Succs[i]->removePredecessor(BB); + + BB->eraseFromParent(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp new file mode 100644 index 0000000..928d92e --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -0,0 +1,1265 @@ +//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SampleProfileLoader transformation. This pass +// reads a profile file generated by a sampling profiler (e.g. Linux Perf - +// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the +// profile information in the given profile. +// +// This pass generates branch weight annotations on the IR: +// +// - prof: Represents branch weights. This annotation is added to branches +// to indicate the weights of each edge coming out of the branch. +// The weight of each edge is the weight of the target block for +// that edge. The weight of a block B is computed as the maximum +// number of samples found in B. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <cctype> + +using namespace llvm; +using namespace sampleprof; + +#define DEBUG_TYPE "sample-profile" + +// Command line option to specify the file to read samples from. This is +// mainly used for debugging. +static cl::opt<std::string> SampleProfileFile( + "sample-profile-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); +static cl::opt<unsigned> SampleProfileMaxPropagateIterations( + "sample-profile-max-propagate-iterations", cl::init(100), + cl::desc("Maximum number of iterations to go through when propagating " + "sample block/edge weights through the CFG.")); +static cl::opt<unsigned> SampleProfileRecordCoverage( + "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of records in the input profile " + "are matched to the IR.")); +static cl::opt<unsigned> SampleProfileSampleCoverage( + "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of samples in the input profile " + "are matched to the IR.")); +static cl::opt<double> SampleProfileHotThreshold( + "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), + cl::desc("Inlined functions that account for more than N% of all samples " + "collected in the parent function, will be inlined again.")); +static cl::opt<double> SampleProfileGlobalHotThreshold( + "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"), + cl::desc("Top-level functions that account for more than N% of all samples " + "collected in the profile, will be marked as hot for the inliner " + "to consider.")); +static cl::opt<double> SampleProfileGlobalColdThreshold( + "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"), + cl::desc("Top-level functions that account for less than N% of all samples " + "collected in the profile, will be marked as cold for the inliner " + "to consider.")); + +namespace { +typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap; +typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap; +typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; +typedef DenseMap<Edge, uint64_t> EdgeWeightMap; +typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>> + BlockEdgeMap; + +/// \brief Sample profile pass. +/// +/// This pass reads profile data from the file specified by +/// -sample-profile-file and annotates every affected function with the +/// profile information found in that file. +class SampleProfileLoader : public ModulePass { +public: + // Class identification, replacement for typeinfo + static char ID; + + SampleProfileLoader(StringRef Name = SampleProfileFile) + : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), + Samples(nullptr), Filename(Name), ProfileIsValid(false), + TotalCollectedSamples(0) { + initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); + } + + bool doInitialization(Module &M) override; + + void dump() { Reader->dump(); } + + const char *getPassName() const override { return "Sample profile pass"; } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + +protected: + bool runOnFunction(Function &F); + unsigned getFunctionLoc(Function &F); + bool emitAnnotations(Function &F); + ErrorOr<uint64_t> getInstWeight(const Instruction &I) const; + ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const; + const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; + const FunctionSamples *findFunctionSamples(const Instruction &I) const; + bool inlineHotFunctions(Function &F); + bool emitInlineHints(Function &F); + void printEdgeWeight(raw_ostream &OS, Edge E); + void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; + void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); + bool computeBlockWeights(Function &F); + void findEquivalenceClasses(Function &F); + void findEquivalencesFor(BasicBlock *BB1, + SmallVector<BasicBlock *, 8> Descendants, + DominatorTreeBase<BasicBlock> *DomTree); + void propagateWeights(Function &F); + uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); + void buildEdges(Function &F); + bool propagateThroughEdges(Function &F); + void computeDominanceAndLoopInfo(Function &F); + unsigned getOffset(unsigned L, unsigned H) const; + void clearFunctionData(); + + /// \brief Map basic blocks to their computed weights. + /// + /// The weight of a basic block is defined to be the maximum + /// of all the instruction weights in that block. + BlockWeightMap BlockWeights; + + /// \brief Map edges to their computed weights. + /// + /// Edge weights are computed by propagating basic block weights in + /// SampleProfile::propagateWeights. + EdgeWeightMap EdgeWeights; + + /// \brief Set of visited blocks during propagation. + SmallPtrSet<const BasicBlock *, 128> VisitedBlocks; + + /// \brief Set of visited edges during propagation. + SmallSet<Edge, 128> VisitedEdges; + + /// \brief Equivalence classes for block weights. + /// + /// Two blocks BB1 and BB2 are in the same equivalence class if they + /// dominate and post-dominate each other, and they are in the same loop + /// nest. When this happens, the two blocks are guaranteed to execute + /// the same number of times. + EquivalenceClassMap EquivalenceClass; + + /// \brief Dominance, post-dominance and loop information. + std::unique_ptr<DominatorTree> DT; + std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT; + std::unique_ptr<LoopInfo> LI; + + /// \brief Predecessors for each basic block in the CFG. + BlockEdgeMap Predecessors; + + /// \brief Successors for each basic block in the CFG. + BlockEdgeMap Successors; + + /// \brief Profile reader object. + std::unique_ptr<SampleProfileReader> Reader; + + /// \brief Samples collected for the body of this function. + FunctionSamples *Samples; + + /// \brief Name of the profile file to load. + StringRef Filename; + + /// \brief Flag indicating whether the profile input loaded successfully. + bool ProfileIsValid; + + /// \brief Total number of samples collected in this profile. + /// + /// This is the sum of all the samples collected in all the functions executed + /// at runtime. + uint64_t TotalCollectedSamples; +}; + +class SampleCoverageTracker { +public: + SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {} + + bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, + uint32_t Discriminator, uint64_t Samples); + unsigned computeCoverage(unsigned Used, unsigned Total) const; + unsigned countUsedRecords(const FunctionSamples *FS) const; + unsigned countBodyRecords(const FunctionSamples *FS) const; + uint64_t getTotalUsedSamples() const { return TotalUsedSamples; } + uint64_t countBodySamples(const FunctionSamples *FS) const; + void clear() { + SampleCoverage.clear(); + TotalUsedSamples = 0; + } + +private: + typedef std::map<LineLocation, unsigned> BodySampleCoverageMap; + typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap> + FunctionSamplesCoverageMap; + + /// Coverage map for sampling records. + /// + /// This map keeps a record of sampling records that have been matched to + /// an IR instruction. This is used to detect some form of staleness in + /// profiles (see flag -sample-profile-check-coverage). + /// + /// Each entry in the map corresponds to a FunctionSamples instance. This is + /// another map that counts how many times the sample record at the + /// given location has been used. + FunctionSamplesCoverageMap SampleCoverage; + + /// Number of samples used from the profile. + /// + /// When a sampling record is used for the first time, the samples from + /// that record are added to this accumulator. Coverage is later computed + /// based on the total number of samples available in this function and + /// its callsites. + /// + /// Note that this accumulator tracks samples used from a single function + /// and all the inlined callsites. Strictly, we should have a map of counters + /// keyed by FunctionSamples pointers, but these stats are cleared after + /// every function, so we just need to keep a single counter. + uint64_t TotalUsedSamples; +}; + +SampleCoverageTracker CoverageTracker; + +/// Return true if the given callsite is hot wrt to its caller. +/// +/// Functions that were inlined in the original binary will be represented +/// in the inline stack in the sample profile. If the profile shows that +/// the original inline decision was "good" (i.e., the callsite is executed +/// frequently), then we will recreate the inline decision and apply the +/// profile from the inlined callsite. +/// +/// To decide whether an inlined callsite is hot, we compute the fraction +/// of samples used by the callsite with respect to the total number of samples +/// collected in the caller. +/// +/// If that fraction is larger than the default given by +/// SampleProfileHotThreshold, the callsite will be inlined again. +bool callsiteIsHot(const FunctionSamples *CallerFS, + const FunctionSamples *CallsiteFS) { + if (!CallsiteFS) + return false; // The callsite was not inlined in the original binary. + + uint64_t ParentTotalSamples = CallerFS->getTotalSamples(); + if (ParentTotalSamples == 0) + return false; // Avoid division by zero. + + uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); + if (CallsiteTotalSamples == 0) + return false; // Callsite is trivially cold. + + double PercentSamples = + (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0; + return PercentSamples >= SampleProfileHotThreshold; +} + +} + +/// Mark as used the sample record for the given function samples at +/// (LineOffset, Discriminator). +/// +/// \returns true if this is the first time we mark the given record. +bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS, + uint32_t LineOffset, + uint32_t Discriminator, + uint64_t Samples) { + LineLocation Loc(LineOffset, Discriminator); + unsigned &Count = SampleCoverage[FS][Loc]; + bool FirstTime = (++Count == 1); + if (FirstTime) + TotalUsedSamples += Samples; + return FirstTime; +} + +/// Return the number of sample records that were applied from this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const { + auto I = SampleCoverage.find(FS); + + // The size of the coverage map for FS represents the number of records + // that were marked used at least once. + unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0; + + // If there are inlined callsites in this function, count the samples found + // in the respective bodies. However, do not bother counting callees with 0 + // total samples, these are callees that were never invoked at runtime. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countUsedRecords(CalleeSamples); + } + + return Count; +} + +/// Return the number of sample records in the body of this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const { + unsigned Count = FS->getBodySamples().size(); + + // Only count records in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countBodyRecords(CalleeSamples); + } + + return Count; +} + +/// Return the number of samples collected in the body of this profile. +/// +/// This count does not include samples from cold inlined callsites. +uint64_t +SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const { + uint64_t Total = 0; + for (const auto &I : FS->getBodySamples()) + Total += I.second.getSamples(); + + // Only count samples in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Total += countBodySamples(CalleeSamples); + } + + return Total; +} + +/// Return the fraction of sample records used in this profile. +/// +/// The returned value is an unsigned integer in the range 0-100 indicating +/// the percentage of sample records that were used while applying this +/// profile to the associated function. +unsigned SampleCoverageTracker::computeCoverage(unsigned Used, + unsigned Total) const { + assert(Used <= Total && + "number of used records cannot exceed the total number of records"); + return Total > 0 ? Used * 100 / Total : 100; +} + +/// Clear all the per-function data used to load samples and propagate weights. +void SampleProfileLoader::clearFunctionData() { + BlockWeights.clear(); + EdgeWeights.clear(); + VisitedBlocks.clear(); + VisitedEdges.clear(); + EquivalenceClass.clear(); + DT = nullptr; + PDT = nullptr; + LI = nullptr; + Predecessors.clear(); + Successors.clear(); + CoverageTracker.clear(); +} + +/// \brief Returns the offset of lineno \p L to head_lineno \p H +/// +/// \param L Lineno +/// \param H Header lineno of the function +/// +/// \returns offset to the header lineno. 16 bits are used to represent offset. +/// We assume that a single function will not exceed 65535 LOC. +unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const { + return (L - H) & 0xffff; +} + +/// \brief Print the weight of edge \p E on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param E Edge to print. +void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) { + OS << "weight[" << E.first->getName() << "->" << E.second->getName() + << "]: " << EdgeWeights[E] << "\n"; +} + +/// \brief Print the equivalence class of block \p BB on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param BB Block to print. +void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS, + const BasicBlock *BB) { + const BasicBlock *Equiv = EquivalenceClass[BB]; + OS << "equivalence[" << BB->getName() + << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n"; +} + +/// \brief Print the weight of block \p BB on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param BB Block to print. +void SampleProfileLoader::printBlockWeight(raw_ostream &OS, + const BasicBlock *BB) const { + const auto &I = BlockWeights.find(BB); + uint64_t W = (I == BlockWeights.end() ? 0 : I->second); + OS << "weight[" << BB->getName() << "]: " << W << "\n"; +} + +/// \brief Get the weight for an instruction. +/// +/// The "weight" of an instruction \p Inst is the number of samples +/// collected on that instruction at runtime. To retrieve it, we +/// need to compute the line number of \p Inst relative to the start of its +/// function. We use HeaderLineno to compute the offset. We then +/// look up the samples collected for \p Inst using BodySamples. +/// +/// \param Inst Instruction to query. +/// +/// \returns the weight of \p Inst. +ErrorOr<uint64_t> +SampleProfileLoader::getInstWeight(const Instruction &Inst) const { + DebugLoc DLoc = Inst.getDebugLoc(); + if (!DLoc) + return std::error_code(); + + const FunctionSamples *FS = findFunctionSamples(Inst); + if (!FS) + return std::error_code(); + + const DILocation *DIL = DLoc; + unsigned Lineno = DLoc.getLine(); + unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine(); + + uint32_t LineOffset = getOffset(Lineno, HeaderLineno); + uint32_t Discriminator = DIL->getDiscriminator(); + ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator); + if (R) { + bool FirstMark = + CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); + if (FirstMark) { + const Function *F = Inst.getParent()->getParent(); + LLVMContext &Ctx = F->getContext(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, *F, DLoc, + Twine("Applied ") + Twine(*R) + " samples from profile (offset: " + + Twine(LineOffset) + + ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")"); + } + DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":" + << Inst << " (line offset: " << Lineno - HeaderLineno << "." + << DIL->getDiscriminator() << " - weight: " << R.get() + << ")\n"); + } + return R; +} + +/// \brief Compute the weight of a basic block. +/// +/// The weight of basic block \p BB is the maximum weight of all the +/// instructions in BB. +/// +/// \param BB The basic block to query. +/// +/// \returns the weight for \p BB. +ErrorOr<uint64_t> +SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const { + bool Found = false; + uint64_t Weight = 0; + for (auto &I : BB->getInstList()) { + const ErrorOr<uint64_t> &R = getInstWeight(I); + if (R && R.get() >= Weight) { + Weight = R.get(); + Found = true; + } + } + if (Found) + return Weight; + else + return std::error_code(); +} + +/// \brief Compute and store the weights of every basic block. +/// +/// This populates the BlockWeights map by computing +/// the weights of every basic block in the CFG. +/// +/// \param F The function to query. +bool SampleProfileLoader::computeBlockWeights(Function &F) { + bool Changed = false; + DEBUG(dbgs() << "Block weights\n"); + for (const auto &BB : F) { + ErrorOr<uint64_t> Weight = getBlockWeight(&BB); + if (Weight) { + BlockWeights[&BB] = Weight.get(); + VisitedBlocks.insert(&BB); + Changed = true; + } + DEBUG(printBlockWeight(dbgs(), &BB)); + } + + return Changed; +} + +/// \brief Get the FunctionSamples for a call instruction. +/// +/// The FunctionSamples of a call instruction \p Inst is the inlined +/// instance in which that call instruction is calling to. It contains +/// all samples that resides in the inlined instance. We first find the +/// inlined instance in which the call instruction is from, then we +/// traverse its children to find the callsite with the matching +/// location and callee function name. +/// +/// \param Inst Call instruction to query. +/// +/// \returns The FunctionSamples pointer to the inlined instance. +const FunctionSamples * +SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) { + return nullptr; + } + DISubprogram *SP = DIL->getScope()->getSubprogram(); + if (!SP) + return nullptr; + + Function *CalleeFunc = Inst.getCalledFunction(); + if (!CalleeFunc) { + return nullptr; + } + + StringRef CalleeName = CalleeFunc->getName(); + const FunctionSamples *FS = findFunctionSamples(Inst); + if (FS == nullptr) + return nullptr; + + return FS->findFunctionSamplesAt( + CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator(), CalleeName)); +} + +/// \brief Get the FunctionSamples for an instruction. +/// +/// The FunctionSamples of an instruction \p Inst is the inlined instance +/// in which that instruction is coming from. We traverse the inline stack +/// of that instruction, and match it with the tree nodes in the profile. +/// +/// \param Inst Instruction to query. +/// +/// \returns the FunctionSamples pointer to the inlined instance. +const FunctionSamples * +SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { + SmallVector<CallsiteLocation, 10> S; + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) { + return Samples; + } + StringRef CalleeName; + for (const DILocation *DIL = Inst.getDebugLoc(); DIL; + DIL = DIL->getInlinedAt()) { + DISubprogram *SP = DIL->getScope()->getSubprogram(); + if (!SP) + return nullptr; + if (!CalleeName.empty()) { + S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator(), CalleeName)); + } + CalleeName = SP->getLinkageName(); + } + if (S.size() == 0) + return Samples; + const FunctionSamples *FS = Samples; + for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { + FS = FS->findFunctionSamplesAt(S[i]); + } + return FS; +} + +/// \brief Emit an inline hint if \p F is globally hot or cold. +/// +/// If \p F consumes a significant fraction of samples (indicated by +/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the +/// inliner to consider the function hot. +/// +/// If \p F consumes a small fraction of samples (indicated by +/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner +/// to consider the function cold. +/// +/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a +/// function globally hot or cold, we should be annotating individual callsites. +/// This is not currently possible, but work on the inliner will eventually +/// provide this ability. See http://reviews.llvm.org/D15003 for details and +/// discussion. +/// +/// \returns True if either attribute was applied to \p F. +bool SampleProfileLoader::emitInlineHints(Function &F) { + if (TotalCollectedSamples == 0) + return false; + + uint64_t FunctionSamples = Samples->getTotalSamples(); + double SamplesPercent = + (double)FunctionSamples / (double)TotalCollectedSamples * 100.0; + + // If the function collected more samples than the hot threshold, mark + // it globally hot. + if (SamplesPercent >= SampleProfileGlobalHotThreshold) { + F.addFnAttr(llvm::Attribute::InlineHint); + std::string Msg; + raw_string_ostream S(Msg); + S << "Applied inline hint to globally hot function '" << F.getName() + << "' with " << format("%.2f", SamplesPercent) + << "% of samples (threshold: " + << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)"; + S.flush(); + emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); + return true; + } + + // If the function collected fewer samples than the cold threshold, mark + // it globally cold. + if (SamplesPercent <= SampleProfileGlobalColdThreshold) { + F.addFnAttr(llvm::Attribute::Cold); + std::string Msg; + raw_string_ostream S(Msg); + S << "Applied cold hint to globally cold function '" << F.getName() + << "' with " << format("%.2f", SamplesPercent) + << "% of samples (threshold: " + << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)"; + S.flush(); + emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); + return true; + } + + return false; +} + +/// \brief Iteratively inline hot callsites of a function. +/// +/// Iteratively traverse all callsites of the function \p F, and find if +/// the corresponding inlined instance exists and is hot in profile. If +/// it is hot enough, inline the callsites and adds new callsites of the +/// callee into the caller. +/// +/// TODO: investigate the possibility of not invoking InlineFunction directly. +/// +/// \param F function to perform iterative inlining. +/// +/// \returns True if there is any inline happened. +bool SampleProfileLoader::inlineHotFunctions(Function &F) { + bool Changed = false; + LLVMContext &Ctx = F.getContext(); + while (true) { + bool LocalChanged = false; + SmallVector<CallInst *, 10> CIS; + for (auto &BB : F) { + for (auto &I : BB.getInstList()) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI))) + CIS.push_back(CI); + } + } + for (auto CI : CIS) { + InlineFunctionInfo IFI; + Function *CalledFunction = CI->getCalledFunction(); + DebugLoc DLoc = CI->getDebugLoc(); + uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples(); + if (InlineFunction(CI, IFI)) { + LocalChanged = true; + emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc, + Twine("inlined hot callee '") + + CalledFunction->getName() + "' with " + + Twine(NumSamples) + " samples into '" + + F.getName() + "'"); + } + } + if (LocalChanged) { + Changed = true; + } else { + break; + } + } + return Changed; +} + +/// \brief Find equivalence classes for the given block. +/// +/// This finds all the blocks that are guaranteed to execute the same +/// number of times as \p BB1. To do this, it traverses all the +/// descendants of \p BB1 in the dominator or post-dominator tree. +/// +/// A block BB2 will be in the same equivalence class as \p BB1 if +/// the following holds: +/// +/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2 +/// is a descendant of \p BB1 in the dominator tree, then BB2 should +/// dominate BB1 in the post-dominator tree. +/// +/// 2- Both BB2 and \p BB1 must be in the same loop. +/// +/// For every block BB2 that meets those two requirements, we set BB2's +/// equivalence class to \p BB1. +/// +/// \param BB1 Block to check. +/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree. +/// \param DomTree Opposite dominator tree. If \p Descendants is filled +/// with blocks from \p BB1's dominator tree, then +/// this is the post-dominator tree, and vice versa. +void SampleProfileLoader::findEquivalencesFor( + BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants, + DominatorTreeBase<BasicBlock> *DomTree) { + const BasicBlock *EC = EquivalenceClass[BB1]; + uint64_t Weight = BlockWeights[EC]; + for (const auto *BB2 : Descendants) { + bool IsDomParent = DomTree->dominates(BB2, BB1); + bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2); + if (BB1 != BB2 && IsDomParent && IsInSameLoop) { + EquivalenceClass[BB2] = EC; + + // If BB2 is heavier than BB1, make BB2 have the same weight + // as BB1. + // + // Note that we don't worry about the opposite situation here + // (when BB2 is lighter than BB1). We will deal with this + // during the propagation phase. Right now, we just want to + // make sure that BB1 has the largest weight of all the + // members of its equivalence set. + Weight = std::max(Weight, BlockWeights[BB2]); + } + } + BlockWeights[EC] = Weight; +} + +/// \brief Find equivalence classes. +/// +/// Since samples may be missing from blocks, we can fill in the gaps by setting +/// the weights of all the blocks in the same equivalence class to the same +/// weight. To compute the concept of equivalence, we use dominance and loop +/// information. Two blocks B1 and B2 are in the same equivalence class if B1 +/// dominates B2, B2 post-dominates B1 and both are in the same loop. +/// +/// \param F The function to query. +void SampleProfileLoader::findEquivalenceClasses(Function &F) { + SmallVector<BasicBlock *, 8> DominatedBBs; + DEBUG(dbgs() << "\nBlock equivalence classes\n"); + // Find equivalence sets based on dominance and post-dominance information. + for (auto &BB : F) { + BasicBlock *BB1 = &BB; + + // Compute BB1's equivalence class once. + if (EquivalenceClass.count(BB1)) { + DEBUG(printBlockEquivalence(dbgs(), BB1)); + continue; + } + + // By default, blocks are in their own equivalence class. + EquivalenceClass[BB1] = BB1; + + // Traverse all the blocks dominated by BB1. We are looking for + // every basic block BB2 such that: + // + // 1- BB1 dominates BB2. + // 2- BB2 post-dominates BB1. + // 3- BB1 and BB2 are in the same loop nest. + // + // If all those conditions hold, it means that BB2 is executed + // as many times as BB1, so they are placed in the same equivalence + // class by making BB2's equivalence class be BB1. + DominatedBBs.clear(); + DT->getDescendants(BB1, DominatedBBs); + findEquivalencesFor(BB1, DominatedBBs, PDT.get()); + + DEBUG(printBlockEquivalence(dbgs(), BB1)); + } + + // Assign weights to equivalence classes. + // + // All the basic blocks in the same equivalence class will execute + // the same number of times. Since we know that the head block in + // each equivalence class has the largest weight, assign that weight + // to all the blocks in that equivalence class. + DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n"); + for (auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EquivBB = EquivalenceClass[BB]; + if (BB != EquivBB) + BlockWeights[BB] = BlockWeights[EquivBB]; + DEBUG(printBlockWeight(dbgs(), BB)); + } +} + +/// \brief Visit the given edge to decide if it has a valid weight. +/// +/// If \p E has not been visited before, we copy to \p UnknownEdge +/// and increment the count of unknown edges. +/// +/// \param E Edge to visit. +/// \param NumUnknownEdges Current number of unknown edges. +/// \param UnknownEdge Set if E has not been visited before. +/// +/// \returns E's weight, if known. Otherwise, return 0. +uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges, + Edge *UnknownEdge) { + if (!VisitedEdges.count(E)) { + (*NumUnknownEdges)++; + *UnknownEdge = E; + return 0; + } + + return EdgeWeights[E]; +} + +/// \brief Propagate weights through incoming/outgoing edges. +/// +/// If the weight of a basic block is known, and there is only one edge +/// with an unknown weight, we can calculate the weight of that edge. +/// +/// Similarly, if all the edges have a known count, we can calculate the +/// count of the basic block, if needed. +/// +/// \param F Function to process. +/// +/// \returns True if new weights were assigned to edges or blocks. +bool SampleProfileLoader::propagateThroughEdges(Function &F) { + bool Changed = false; + DEBUG(dbgs() << "\nPropagation through edges\n"); + for (const auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EC = EquivalenceClass[BB]; + + // Visit all the predecessor and successor edges to determine + // which ones have a weight assigned already. Note that it doesn't + // matter that we only keep track of a single unknown edge. The + // only case we are interested in handling is when only a single + // edge is unknown (see setEdgeOrBlockWeight). + for (unsigned i = 0; i < 2; i++) { + uint64_t TotalWeight = 0; + unsigned NumUnknownEdges = 0; + Edge UnknownEdge, SelfReferentialEdge; + + if (i == 0) { + // First, visit all predecessor edges. + for (auto *Pred : Predecessors[BB]) { + Edge E = std::make_pair(Pred, BB); + TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); + if (E.first == E.second) + SelfReferentialEdge = E; + } + } else { + // On the second round, visit all successor edges. + for (auto *Succ : Successors[BB]) { + Edge E = std::make_pair(BB, Succ); + TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); + } + } + + // After visiting all the edges, there are three cases that we + // can handle immediately: + // + // - All the edge weights are known (i.e., NumUnknownEdges == 0). + // In this case, we simply check that the sum of all the edges + // is the same as BB's weight. If not, we change BB's weight + // to match. Additionally, if BB had not been visited before, + // we mark it visited. + // + // - Only one edge is unknown and BB has already been visited. + // In this case, we can compute the weight of the edge by + // subtracting the total block weight from all the known + // edge weights. If the edges weight more than BB, then the + // edge of the last remaining edge is set to zero. + // + // - There exists a self-referential edge and the weight of BB is + // known. In this case, this edge can be based on BB's weight. + // We add up all the other known edges and set the weight on + // the self-referential edge as we did in the previous case. + // + // In any other case, we must continue iterating. Eventually, + // all edges will get a weight, or iteration will stop when + // it reaches SampleProfileMaxPropagateIterations. + if (NumUnknownEdges <= 1) { + uint64_t &BBWeight = BlockWeights[EC]; + if (NumUnknownEdges == 0) { + // If we already know the weight of all edges, the weight of the + // basic block can be computed. It should be no larger than the sum + // of all edge weights. + if (TotalWeight > BBWeight) { + BBWeight = TotalWeight; + Changed = true; + DEBUG(dbgs() << "All edge weights for " << BB->getName() + << " known. Set weight for block: "; + printBlockWeight(dbgs(), BB);); + } + if (VisitedBlocks.insert(EC).second) + Changed = true; + } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) { + // If there is a single unknown edge and the block has been + // visited, then we can compute E's weight. + if (BBWeight >= TotalWeight) + EdgeWeights[UnknownEdge] = BBWeight - TotalWeight; + else + EdgeWeights[UnknownEdge] = 0; + VisitedEdges.insert(UnknownEdge); + Changed = true; + DEBUG(dbgs() << "Set weight for edge: "; + printEdgeWeight(dbgs(), UnknownEdge)); + } + } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) { + uint64_t &BBWeight = BlockWeights[BB]; + // We have a self-referential edge and the weight of BB is known. + if (BBWeight >= TotalWeight) + EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight; + else + EdgeWeights[SelfReferentialEdge] = 0; + VisitedEdges.insert(SelfReferentialEdge); + Changed = true; + DEBUG(dbgs() << "Set self-referential edge weight to: "; + printEdgeWeight(dbgs(), SelfReferentialEdge)); + } + } + } + + return Changed; +} + +/// \brief Build in/out edge lists for each basic block in the CFG. +/// +/// We are interested in unique edges. If a block B1 has multiple +/// edges to another block B2, we only add a single B1->B2 edge. +void SampleProfileLoader::buildEdges(Function &F) { + for (auto &BI : F) { + BasicBlock *B1 = &BI; + + // Add predecessors for B1. + SmallPtrSet<BasicBlock *, 16> Visited; + if (!Predecessors[B1].empty()) + llvm_unreachable("Found a stale predecessors list in a basic block."); + for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) { + BasicBlock *B2 = *PI; + if (Visited.insert(B2).second) + Predecessors[B1].push_back(B2); + } + + // Add successors for B1. + Visited.clear(); + if (!Successors[B1].empty()) + llvm_unreachable("Found a stale successors list in a basic block."); + for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) { + BasicBlock *B2 = *SI; + if (Visited.insert(B2).second) + Successors[B1].push_back(B2); + } + } +} + +/// \brief Propagate weights into edges +/// +/// The following rules are applied to every block BB in the CFG: +/// +/// - If BB has a single predecessor/successor, then the weight +/// of that edge is the weight of the block. +/// +/// - If all incoming or outgoing edges are known except one, and the +/// weight of the block is already known, the weight of the unknown +/// edge will be the weight of the block minus the sum of all the known +/// edges. If the sum of all the known edges is larger than BB's weight, +/// we set the unknown edge weight to zero. +/// +/// - If there is a self-referential edge, and the weight of the block is +/// known, the weight for that edge is set to the weight of the block +/// minus the weight of the other incoming edges to that block (if +/// known). +void SampleProfileLoader::propagateWeights(Function &F) { + bool Changed = true; + unsigned I = 0; + + // Add an entry count to the function using the samples gathered + // at the function entry. + F.setEntryCount(Samples->getHeadSamples()); + + // Before propagation starts, build, for each block, a list of + // unique predecessors and successors. This is necessary to handle + // identical edges in multiway branches. Since we visit all blocks and all + // edges of the CFG, it is cleaner to build these lists once at the start + // of the pass. + buildEdges(F); + + // Propagate until we converge or we go past the iteration limit. + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F); + } + + // Generate MD_prof metadata for every branch instruction using the + // edge weights computed during propagation. + DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n"); + LLVMContext &Ctx = F.getContext(); + MDBuilder MDB(Ctx); + for (auto &BI : F) { + BasicBlock *BB = &BI; + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 1) + continue; + if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + continue; + + DEBUG(dbgs() << "\nGetting weights for branch at line " + << TI->getDebugLoc().getLine() << ".\n"); + SmallVector<uint32_t, 4> Weights; + uint32_t MaxWeight = 0; + DebugLoc MaxDestLoc; + for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { + BasicBlock *Succ = TI->getSuccessor(I); + Edge E = std::make_pair(BB, Succ); + uint64_t Weight = EdgeWeights[E]; + DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E)); + // Use uint32_t saturated arithmetic to adjust the incoming weights, + // if needed. Sample counts in profiles are 64-bit unsigned values, + // but internally branch weights are expressed as 32-bit values. + if (Weight > std::numeric_limits<uint32_t>::max()) { + DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); + Weight = std::numeric_limits<uint32_t>::max(); + } + Weights.push_back(static_cast<uint32_t>(Weight)); + if (Weight != 0) { + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc(); + } + } + } + + // Only set weights if there is at least one non-zero weight. + // In any other case, let the analyzer set weights. + if (MaxWeight > 0) { + DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); + TI->setMetadata(llvm::LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + DebugLoc BranchLoc = TI->getDebugLoc(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, F, MaxDestLoc, + Twine("most popular destination for conditional branches at ") + + ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" + + Twine(BranchLoc.getLine()) + ":" + + Twine(BranchLoc.getCol())) + : Twine("<UNKNOWN LOCATION>"))); + } else { + DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + } + } +} + +/// \brief Get the line number for the function header. +/// +/// This looks up function \p F in the current compilation unit and +/// retrieves the line number where the function is defined. This is +/// line 0 for all the samples read from the profile file. Every line +/// number is relative to this line. +/// +/// \param F Function object to query. +/// +/// \returns the line number where \p F is defined. If it returns 0, +/// it means that there is no debug information available for \p F. +unsigned SampleProfileLoader::getFunctionLoc(Function &F) { + if (DISubprogram *S = getDISubprogram(&F)) + return S->getLine(); + + // If the start of \p F is missing, emit a diagnostic to inform the user + // about the missed opportunity. + F.getContext().diagnose(DiagnosticInfoSampleProfile( + "No debug information found in function " + F.getName() + + ": Function profile not used", + DS_Warning)); + return 0; +} + +void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) { + DT.reset(new DominatorTree); + DT->recalculate(F); + + PDT.reset(new DominatorTreeBase<BasicBlock>(true)); + PDT->recalculate(F); + + LI.reset(new LoopInfo); + LI->analyze(*DT); +} + +/// \brief Generate branch weight metadata for all branches in \p F. +/// +/// Branch weights are computed out of instruction samples using a +/// propagation heuristic. Propagation proceeds in 3 phases: +/// +/// 1- Assignment of block weights. All the basic blocks in the function +/// are initial assigned the same weight as their most frequently +/// executed instruction. +/// +/// 2- Creation of equivalence classes. Since samples may be missing from +/// blocks, we can fill in the gaps by setting the weights of all the +/// blocks in the same equivalence class to the same weight. To compute +/// the concept of equivalence, we use dominance and loop information. +/// Two blocks B1 and B2 are in the same equivalence class if B1 +/// dominates B2, B2 post-dominates B1 and both are in the same loop. +/// +/// 3- Propagation of block weights into edges. This uses a simple +/// propagation heuristic. The following rules are applied to every +/// block BB in the CFG: +/// +/// - If BB has a single predecessor/successor, then the weight +/// of that edge is the weight of the block. +/// +/// - If all the edges are known except one, and the weight of the +/// block is already known, the weight of the unknown edge will +/// be the weight of the block minus the sum of all the known +/// edges. If the sum of all the known edges is larger than BB's weight, +/// we set the unknown edge weight to zero. +/// +/// - If there is a self-referential edge, and the weight of the block is +/// known, the weight for that edge is set to the weight of the block +/// minus the weight of the other incoming edges to that block (if +/// known). +/// +/// Since this propagation is not guaranteed to finalize for every CFG, we +/// only allow it to proceed for a limited number of iterations (controlled +/// by -sample-profile-max-propagate-iterations). +/// +/// FIXME: Try to replace this propagation heuristic with a scheme +/// that is guaranteed to finalize. A work-list approach similar to +/// the standard value propagation algorithm used by SSA-CCP might +/// work here. +/// +/// Once all the branch weights are computed, we emit the MD_prof +/// metadata on BB using the computed values for each of its branches. +/// +/// \param F The function to query. +/// +/// \returns true if \p F was modified. Returns false, otherwise. +bool SampleProfileLoader::emitAnnotations(Function &F) { + bool Changed = false; + + if (getFunctionLoc(F) == 0) + return false; + + DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() + << ": " << getFunctionLoc(F) << "\n"); + + Changed |= emitInlineHints(F); + + Changed |= inlineHotFunctions(F); + + // Compute basic block weights. + Changed |= computeBlockWeights(F); + + if (Changed) { + // Compute dominance and loop info needed for propagation. + computeDominanceAndLoopInfo(F); + + // Find equivalence classes. + findEquivalenceClasses(F); + + // Propagate weights to all edges. + propagateWeights(F); + } + + // If coverage checking was requested, compute it now. + if (SampleProfileRecordCoverage) { + unsigned Used = CoverageTracker.countUsedRecords(Samples); + unsigned Total = CoverageTracker.countBodyRecords(Samples); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileRecordCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile records (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } + + if (SampleProfileSampleCoverage) { + uint64_t Used = CoverageTracker.getTotalUsedSamples(); + uint64_t Total = CoverageTracker.countBodySamples(Samples); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileSampleCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile samples (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } + return Changed; +} + +char SampleProfileLoader::ID = 0; +INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile", + "Sample Profile loader", false, false) +INITIALIZE_PASS_DEPENDENCY(AddDiscriminators) +INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile", + "Sample Profile loader", false, false) + +bool SampleProfileLoader::doInitialization(Module &M) { + auto &Ctx = M.getContext(); + auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not open profile: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); + return false; + } + Reader = std::move(ReaderOrErr.get()); + ProfileIsValid = (Reader->read() == sampleprof_error::success); + return true; +} + +ModulePass *llvm::createSampleProfileLoaderPass() { + return new SampleProfileLoader(SampleProfileFile); +} + +ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { + return new SampleProfileLoader(Name); +} + +bool SampleProfileLoader::runOnModule(Module &M) { + if (!ProfileIsValid) + return false; + + // Compute the total number of samples collected in this profile. + for (const auto &I : Reader->getProfiles()) + TotalCollectedSamples += I.second.getTotalSamples(); + + bool retval = false; + for (auto &F : M) + if (!F.isDeclaration()) { + clearFunctionData(); + retval |= runOnFunction(F); + } + return retval; +} + +bool SampleProfileLoader::runOnFunction(Function &F) { + Samples = Reader->getSamplesFor(F); + if (!Samples->empty()) + return emitAnnotations(F); + return false; +} diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp new file mode 100644 index 0000000..c94cc7c --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -0,0 +1,84 @@ +//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for +// dead declarations and removes them. Dead declarations are declarations of +// functions for which no implementation is available (i.e., declarations for +// unused library functions). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" + +using namespace llvm; + +#define DEBUG_TYPE "strip-dead-prototypes" + +STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); + +static bool stripDeadPrototypes(Module &M) { + bool MadeChange = false; + + // Erase dead function prototypes. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function *F = &*I++; + // Function must be a prototype and unused. + if (F->isDeclaration() && F->use_empty()) { + F->eraseFromParent(); + ++NumDeadPrototypes; + MadeChange = true; + } + } + + // Erase dead global var prototypes. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ) { + GlobalVariable *GV = &*I++; + // Global must be a prototype and unused. + if (GV->isDeclaration() && GV->use_empty()) + GV->eraseFromParent(); + } + + // Return an indication of whether we changed anything or not. + return MadeChange; +} + +PreservedAnalyses StripDeadPrototypesPass::run(Module &M) { + if (stripDeadPrototypes(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +namespace { + +class StripDeadPrototypesLegacyPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + StripDeadPrototypesLegacyPass() : ModulePass(ID) { + initializeStripDeadPrototypesLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override { + return stripDeadPrototypes(M); + } +}; + +} // end anonymous namespace + +char StripDeadPrototypesLegacyPass::ID = 0; +INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes", + "Strip Unused Function Prototypes", false, false) + +ModulePass *llvm::createStripDeadPrototypesPass() { + return new StripDeadPrototypesLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp new file mode 100644 index 0000000..46f352f --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -0,0 +1,363 @@ +//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The StripSymbols transformation implements code stripping. Specifically, it +// can delete: +// +// * names for virtual registers +// * symbols for internal globals and functions +// * debug information +// +// Note that this transformation makes code much less readable, so it should +// only be used in situations where the 'strip' utility would be used, such as +// reducing code size or making it harder to reverse engineer code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +namespace { + class StripSymbols : public ModulePass { + bool OnlyDebugInfo; + public: + static char ID; // Pass identification, replacement for typeid + explicit StripSymbols(bool ODI = false) + : ModulePass(ID), OnlyDebugInfo(ODI) { + initializeStripSymbolsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + }; + + class StripNonDebugSymbols : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripNonDebugSymbols() + : ModulePass(ID) { + initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + }; + + class StripDebugDeclare : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDebugDeclare() + : ModulePass(ID) { + initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + }; + + class StripDeadDebugInfo : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDeadDebugInfo() + : ModulePass(ID) { + initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + }; +} + +char StripSymbols::ID = 0; +INITIALIZE_PASS(StripSymbols, "strip", + "Strip all symbols from a module", false, false) + +ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { + return new StripSymbols(OnlyDebugInfo); +} + +char StripNonDebugSymbols::ID = 0; +INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug", + "Strip all symbols, except dbg symbols, from a module", + false, false) + +ModulePass *llvm::createStripNonDebugSymbolsPass() { + return new StripNonDebugSymbols(); +} + +char StripDebugDeclare::ID = 0; +INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare", + "Strip all llvm.dbg.declare intrinsics", false, false) + +ModulePass *llvm::createStripDebugDeclarePass() { + return new StripDebugDeclare(); +} + +char StripDeadDebugInfo::ID = 0; +INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info", + "Strip debug info for unused symbols", false, false) + +ModulePass *llvm::createStripDeadDebugInfoPass() { + return new StripDeadDebugInfo(); +} + +/// OnlyUsedBy - Return true if V is only used by Usr. +static bool OnlyUsedBy(Value *V, Value *Usr) { + for (User *U : V->users()) + if (U != Usr) + return false; + + return true; +} + +static void RemoveDeadConstant(Constant *C) { + assert(C->use_empty() && "Constant is not dead!"); + SmallPtrSet<Constant*, 4> Operands; + for (Value *Op : C->operands()) + if (OnlyUsedBy(Op, C)) + Operands.insert(cast<Constant>(Op)); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals. + GV->eraseFromParent(); + } + else if (!isa<Function>(C)) + if (isa<CompositeType>(C->getType())) + C->destroyConstant(); + + // If the constant referenced anything, see if we can delete it as well. + for (Constant *O : Operands) + RemoveDeadConstant(O); +} + +// Strip the symbol table of its names. +// +static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { + for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) { + Value *V = VI->getValue(); + ++VI; + if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) { + if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg")) + // Set name to "", removing from symbol table! + V->setName(""); + } + } +} + +// Strip any named types of their names. +static void StripTypeNames(Module &M, bool PreserveDbgInfo) { + TypeFinder StructTypes; + StructTypes.run(M, false); + + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isLiteral() || STy->getName().empty()) continue; + + if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) + continue; + + STy->setName(""); + } +} + +/// Find values that are marked as llvm.used. +static void findUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSetImpl<const GlobalValue*> &UsedValues) { + if (!LLVMUsed) return; + UsedValues.insert(LLVMUsed); + + ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + +/// StripSymbolNames - Strip symbol names. +static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { + + SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; + findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues); + findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues); + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) + I->setName(""); // Internal symbols can't participate in linkage + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) + I->setName(""); // Internal symbols can't participate in linkage + StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); + } + + // Remove all names from types. + StripTypeNames(M, PreserveDbgInfo); + + return true; +} + +bool StripSymbols::runOnModule(Module &M) { + bool Changed = false; + Changed |= StripDebugInfo(M); + if (!OnlyDebugInfo) + Changed |= StripSymbolNames(M, false); + return Changed; +} + +bool StripNonDebugSymbols::runOnModule(Module &M) { + return StripSymbolNames(M, true); +} + +bool StripDebugDeclare::runOnModule(Module &M) { + + Function *Declare = M.getFunction("llvm.dbg.declare"); + std::vector<Constant*> DeadConstants; + + if (Declare) { + while (!Declare->use_empty()) { + CallInst *CI = cast<CallInst>(Declare->user_back()); + Value *Arg1 = CI->getArgOperand(0); + Value *Arg2 = CI->getArgOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg1->use_empty()) { + if (Constant *C = dyn_cast<Constant>(Arg1)) + DeadConstants.push_back(C); + else + RecursivelyDeleteTriviallyDeadInstructions(Arg1); + } + if (Arg2->use_empty()) + if (Constant *C = dyn_cast<Constant>(Arg2)) + DeadConstants.push_back(C); + } + Declare->eraseFromParent(); + } + + while (!DeadConstants.empty()) { + Constant *C = DeadConstants.back(); + DeadConstants.pop_back(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (GV->hasLocalLinkage()) + RemoveDeadConstant(GV); + } else + RemoveDeadConstant(C); + } + + return true; +} + +/// Remove any debug info for global variables/functions in the given module for +/// which said global variable/function no longer exists (i.e. is null). +/// +/// Debugging information is encoded in llvm IR using metadata. This is designed +/// such a way that debug info for symbols preserved even if symbols are +/// optimized away by the optimizer. This special pass removes debug info for +/// such symbols. +bool StripDeadDebugInfo::runOnModule(Module &M) { + bool Changed = false; + + LLVMContext &C = M.getContext(); + + // Find all debug info in F. This is actually overkill in terms of what we + // want to do, but we want to try and be as resilient as possible in the face + // of potential debug info changes by using the formal interfaces given to us + // as much as possible. + DebugInfoFinder F; + F.processModule(M); + + // For each compile unit, find the live set of global variables/functions and + // replace the current list of potentially dead global variables/functions + // with the live list. + SmallVector<Metadata *, 64> LiveGlobalVariables; + SmallVector<Metadata *, 64> LiveSubprograms; + DenseSet<const MDNode *> VisitedSet; + + std::set<DISubprogram *> LiveSPs; + for (Function &F : M) { + if (DISubprogram *SP = F.getSubprogram()) + LiveSPs.insert(SP); + } + + for (DICompileUnit *DIC : F.compile_units()) { + // Create our live subprogram list. + bool SubprogramChange = false; + for (DISubprogram *DISP : DIC->getSubprograms()) { + // Make sure we visit each subprogram only once. + if (!VisitedSet.insert(DISP).second) + continue; + + // If the function referenced by DISP is not null, the function is live. + if (LiveSPs.count(DISP)) + LiveSubprograms.push_back(DISP); + else + SubprogramChange = true; + } + + // Create our live global variable list. + bool GlobalVariableChange = false; + for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) { + // Make sure we only visit each global variable only once. + if (!VisitedSet.insert(DIG).second) + continue; + + // If the global variable referenced by DIG is not null, the global + // variable is live. + if (DIG->getVariable()) + LiveGlobalVariables.push_back(DIG); + else + GlobalVariableChange = true; + } + + // If we found dead subprograms or global variables, replace the current + // subprogram list/global variable list with our new live subprogram/global + // variable list. + if (SubprogramChange) { + DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms)); + Changed = true; + } + + if (GlobalVariableChange) { + DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables)); + Changed = true; + } + + // Reset lists for the next iteration. + LiveSubprograms.clear(); + LiveGlobalVariables.clear(); + } + + return Changed; +} |