diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
25 files changed, 9600 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp new file mode 100644 index 0000000..89f213e --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -0,0 +1,887 @@ +//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass promotes "by reference" arguments to be "by value" arguments. In +// practice, this means looking for internal functions that have pointer +// arguments. If it can prove, through the use of alias analysis, that an +// argument is *only* loaded, then it can pass the value into the function +// instead of the address of the value. This can cause recursive simplification +// of code and lead to the elimination of allocas (especially in C++ template +// code like the STL). +// +// This pass also handles aggregate arguments that are passed into a function, +// scalarizing them if the elements of the aggregate are only loaded. Note that +// by default it refuses to scalarize aggregates which would require passing in +// more than three operands to the function, because passing thousands of +// operands for a large array or structure is unprofitable! This limit can be +// configured or disabled, however. +// +// Note that this transformation could also be done for arguments that are only +// stored to (returning the value instead), but does not currently. This case +// would be best handled when and if LLVM begins supporting multiple return +// values from functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "argpromotion" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include <set> +using namespace llvm; + +STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); +STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); +STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); +STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated"); + +namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. + /// + struct ArgPromotion : public CallGraphSCCPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + virtual bool runOnSCC(CallGraphSCC &SCC); + static char ID; // Pass identification, replacement for typeid + explicit ArgPromotion(unsigned maxElements = 3) + : CallGraphSCCPass(&ID), maxElements(maxElements) {} + + /// A vector used to hold the indices of a single GEP instruction + typedef std::vector<uint64_t> IndicesVector; + + private: + CallGraphNode *PromoteArguments(CallGraphNode *CGN); + bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; + CallGraphNode *DoPromotion(Function *F, + SmallPtrSet<Argument*, 8> &ArgsToPromote, + SmallPtrSet<Argument*, 8> &ByValArgsToTransform); + /// The maximum number of elements to expand, or 0 for unlimited. + unsigned maxElements; + }; +} + +char ArgPromotion::ID = 0; +static RegisterPass<ArgPromotion> +X("argpromotion", "Promote 'by reference' arguments to scalars"); + +Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { + return new ArgPromotion(maxElements); +} + +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { + bool Changed = false, LocalChange; + + do { // Iterate until we stop promoting from this SCC. + LocalChange = false; + // Attempt to promote arguments from all functions in this SCC. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + if (CallGraphNode *CGN = PromoteArguments(*I)) { + LocalChange = true; + SCC.ReplaceNode(*I, CGN); + } + } + Changed |= LocalChange; // Remember that we changed something. + } while (LocalChange); + + return Changed; +} + +/// PromoteArguments - This method checks the specified function to see if there +/// are any promotable arguments and if it is safe to promote the function (for +/// example, all callers are direct). If safe to promote some arguments, it +/// calls the DoPromotion method. +/// +CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { + Function *F = CGN->getFunction(); + + // Make sure that it is local to this module. + if (!F || !F->hasLocalLinkage()) return 0; + + // First check: see if there are any pointer arguments! If not, quick exit. + SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs; + unsigned ArgNo = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++ArgNo) + if (I->getType()->isPointerTy()) + PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); + if (PointerArgs.empty()) return 0; + + // Second check: make sure that all callers are direct callers. We can't + // transform functions that have indirect callers. + if (F->hasAddressTaken()) + return 0; + + // Check to see which arguments are promotable. If an argument is promotable, + // add it to ArgsToPromote. + SmallPtrSet<Argument*, 8> ArgsToPromote; + SmallPtrSet<Argument*, 8> ByValArgsToTransform; + for (unsigned i = 0; i != PointerArgs.size(); ++i) { + bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); + + // If this is a byval argument, and if the aggregate type is small, just + // pass the elements, which is always safe. + Argument *PtrArg = PointerArgs[i].first; + if (isByVal) { + const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); + if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + if (maxElements > 0 && STy->getNumElements() > maxElements) { + DEBUG(dbgs() << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more" + << " than " << maxElements << " arguments to the function.\n"); + } else { + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + if (!STy->getElementType(i)->isSingleValueType()) { + AllSimple = false; + break; + } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow scalarrepl to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } + } + } + } + + // Otherwise, see if we can promote the pointer to its value. + if (isSafeToPromoteArgument(PtrArg, isByVal)) + ArgsToPromote.insert(PtrArg); + } + + // No promotable pointer arguments. + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + return 0; + + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); +} + +/// IsAlwaysValidPointer - Return true if the specified pointer is always legal +/// to load. +static bool IsAlwaysValidPointer(Value *V) { + if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true; + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) + return IsAlwaysValidPointer(GEP->getOperand(0)); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::GetElementPtr) + return IsAlwaysValidPointer(CE->getOperand(0)); + + return false; +} + +/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that +/// all callees pass in a valid pointer for the specified function argument. +static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { + Function *Callee = Arg->getParent(); + + unsigned ArgNo = std::distance(Callee->arg_begin(), + Function::arg_iterator(Arg)); + + // Look at all call sites of the function. At this pointer we know we only + // have direct callees. + for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); + UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + assert(CS.getInstruction() && "Should only have direct calls!"); + + if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) + return false; + } + return true; +} + +/// Returns true if Prefix is a prefix of longer. That means, Longer has a size +/// that is greater than or equal to the size of prefix, and each of the +/// elements in Prefix is the same as the corresponding elements in Longer. +/// +/// This means it also returns true when Prefix and Longer are equal! +static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, + const ArgPromotion::IndicesVector &Longer) { + if (Prefix.size() > Longer.size()) + return false; + for (unsigned i = 0, e = Prefix.size(); i != e; ++i) + if (Prefix[i] != Longer[i]) + return false; + return true; +} + + +/// Checks if Indices, or a prefix of Indices, is in Set. +static bool PrefixIn(const ArgPromotion::IndicesVector &Indices, + std::set<ArgPromotion::IndicesVector> &Set) { + std::set<ArgPromotion::IndicesVector>::iterator Low; + Low = Set.upper_bound(Indices); + if (Low != Set.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This means + // it points to a prefix of Indices (possibly Indices itself), if such + // prefix exists. + // + // This load is safe if any prefix of its operands is safe to load. + return Low != Set.end() && IsPrefix(*Low, Indices); +} + +/// Mark the given indices (ToMark) as safe in the given set of indices +/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there +/// is already a prefix of Indices in Safe, Indices are implicitely marked safe +/// already. Furthermore, any indices that Indices is itself a prefix of, are +/// removed from Safe (since they are implicitely safe because of Indices now). +static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, + std::set<ArgPromotion::IndicesVector> &Safe) { + std::set<ArgPromotion::IndicesVector>::iterator Low; + Low = Safe.upper_bound(ToMark); + // Guard against the case where Safe is empty + if (Low != Safe.begin()) + Low--; + // Low is now the last element smaller than or equal to Indices. This + // means it points to a prefix of Indices (possibly Indices itself), if + // such prefix exists. + if (Low != Safe.end()) { + if (IsPrefix(*Low, ToMark)) + // If there is already a prefix of these indices (or exactly these + // indices) marked a safe, don't bother adding these indices + return; + + // Increment Low, so we can use it as a "insert before" hint + ++Low; + } + // Insert + Low = Safe.insert(Low, ToMark); + ++Low; + // If there we're a prefix of longer index list(s), remove those + std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end(); + while (Low != End && IsPrefix(ToMark, *Low)) { + std::set<ArgPromotion::IndicesVector>::iterator Remove = Low; + ++Low; + Safe.erase(Remove); + } +} + +/// isSafeToPromoteArgument - As you might guess from the name of this method, +/// it checks to see if it is both safe and useful to promote the argument. +/// This method limits promotion of aggregates to only promote up to three +/// elements of the aggregate in order to avoid exploding the number of +/// arguments passed in. +bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { + typedef std::set<IndicesVector> GEPIndicesSet; + + // Quick exit for unused arguments + if (Arg->use_empty()) + return true; + + // We can only promote this argument if all of the uses are loads, or are GEP + // instructions (with constant indices) that are subsequently loaded. + // + // Promoting the argument causes it to be loaded in the caller + // unconditionally. This is only safe if we can prove that either the load + // would have happened in the callee anyway (ie, there is a load in the entry + // block) or the pointer passed in at every call site is guaranteed to be + // valid. + // In the former case, invalid loads can happen, but would have happened + // anyway, in the latter case, invalid loads won't happen. This prevents us + // from introducing an invalid load that wouldn't have happened in the + // original code. + // + // This set will contain all sets of indices that are loaded in the entry + // block, and thus are safe to unconditionally load in the caller. + GEPIndicesSet SafeToUnconditionallyLoad; + + // This set contains all the sets of indices that we are planning to promote. + // This makes it possible to limit the number of arguments added. + GEPIndicesSet ToPromote; + + // If the pointer is always valid, any load with first index 0 is valid. + if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) + SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + + // First, iterate the entry block and mark loads of (geps of) arguments as + // safe. + BasicBlock *EntryBlock = Arg->getParent()->begin(); + // Declare this here so we can reuse it + IndicesVector Indices; + for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); + I != E; ++I) + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + Value *V = LI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { + V = GEP->getPointerOperand(); + if (V == Arg) { + // This load actually loads (part of) Arg? Check the indices then. + Indices.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) + Indices.push_back(CI->getSExtValue()); + else + // We found a non-constant GEP index for this argument? Bail out + // right away, can't promote this argument at all. + return false; + + // Indices checked out, mark them as safe + MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); + Indices.clear(); + } + } else if (V == Arg) { + // Direct loads are equivalent to a GEP with a single 0 index. + MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + } + } + + // Now, iterate all uses of the argument to see if there are any uses that are + // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + SmallVector<LoadInst*, 16> Loads; + IndicesVector Operands; + for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); + UI != E; ++UI) { + Operands.clear(); + if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (LI->isVolatile()) return false; // Don't hack volatile loads + Loads.push_back(LI); + // Direct loads are equivalent to a GEP with a zero index and then a load. + Operands.push_back(0); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + if (GEP->use_empty()) { + // Dead GEP's cause trouble later. Just remove them if we run into + // them. + getAnalysis<AliasAnalysis>().deleteValue(GEP); + GEP->eraseFromParent(); + // TODO: This runs the above loop over and over again for dead GEPS + // Couldn't we just do increment the UI iterator earlier and erase the + // use? + return isSafeToPromoteArgument(Arg, isByVal); + } + + // Ensure that all of the indices are constants. + for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); + i != e; ++i) + if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) + Operands.push_back(C->getSExtValue()); + else + return false; // Not a constant operand GEP! + + // Ensure that the only users of the GEP are load instructions. + for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); + UI != E; ++UI) + if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (LI->isVolatile()) return false; // Don't hack volatile loads + Loads.push_back(LI); + } else { + // Other uses than load? + return false; + } + } else { + return false; // Not a load or a GEP. + } + + // Now, see if it is safe to promote this load / loads of this GEP. Loading + // is safe if Operands, or a prefix of Operands, is marked as safe. + if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) + return false; + + // See if we are already promoting a load with these indices. If not, check + // to make sure that we aren't promoting too many elements. If so, nothing + // to do. + if (ToPromote.find(Operands) == ToPromote.end()) { + if (maxElements > 0 && ToPromote.size() == maxElements) { + DEBUG(dbgs() << "argpromotion not promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than " << maxElements << " arguments to the function.\n"); + // We limit aggregate promotion to only promoting up to a fixed number + // of elements of the aggregate. + return false; + } + ToPromote.insert(Operands); + } + } + + if (Loads.empty()) return true; // No users, this is a dead argument. + + // Okay, now we know that the argument is only used by load instructions and + // it is safe to unconditionally perform all of them. Use alias analysis to + // check to see if the pointer is guaranteed to not be modified from entry of + // the function to each of the load instructions. + + // Because there could be several/many load instructions, remember which + // blocks we know to be transparent to the load. + SmallPtrSet<BasicBlock*, 16> TranspBlocks; + + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) return false; // Without TargetData, assume the worst. + + for (unsigned i = 0, e = Loads.size(); i != e; ++i) { + // Check to see if the load is invalidated from the start of the block to + // the load itself. + LoadInst *Load = Loads[i]; + BasicBlock *BB = Load->getParent(); + + const PointerType *LoadTy = + cast<PointerType>(Load->getPointerOperand()->getType()); + unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); + + if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) + return false; // Pointer is invalidated! + + // Now check every path from the entry block to the load for transparency. + // To do this, we perform a depth first search on the inverse CFG from the + // loading block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > + I = idf_ext_begin(*PI, TranspBlocks), + E = idf_ext_end(*PI, TranspBlocks); I != E; ++I) + if (AA.canBasicBlockModify(**I, Arg, LoadSize)) + return false; + } + + // If the path from the entry of the function to each load is free of + // instructions that potentially invalidate the load, we can make the + // transformation! + return true; +} + +/// DoPromotion - This method actually performs the promotion of the specified +/// arguments, and returns the new function. At this point, we know that it's +/// safe to do so. +CallGraphNode *ArgPromotion::DoPromotion(Function *F, + SmallPtrSet<Argument*, 8> &ArgsToPromote, + SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has modified arguments. + const FunctionType *FTy = F->getFunctionType(); + std::vector<const Type*> Params; + + typedef std::set<IndicesVector> ScalarizeTable; + + // ScalarizedElements - If we are promoting a pointer that has elements + // accessed out of it, keep track of which elements are accessed so that we + // can add one argument for each. + // + // Arguments that are directly loaded will have a zero element value here, to + // handle cases where there are both a direct load and GEP accesses. + // + std::map<Argument*, ScalarizeTable> ScalarizedElements; + + // OriginalLoads - Keep track of a representative load instruction from the + // original function so that we can tell the alias analysis implementation + // what the new GEP/Load instructions we are inserting look like. + std::map<IndicesVector, LoadInst*> OriginalLoads; + + // Attributes - Keep track of the parameter attributes for the arguments + // that we are *not* promoting. For the ones that we do promote, the parameter + // attributes are lost + SmallVector<AttributeWithIndex, 8> AttributesVec; + const AttrListPtr &PAL = F->getAttributes(); + + // Add any return attributes. + if (Attributes attrs = PAL.getRetAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + + // First, determine the new argument list + unsigned ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgIndex) { + if (ByValArgsToTransform.count(I)) { + // Simple byval argument? Just add all the struct element types. + const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + const StructType *STy = cast<StructType>(AgTy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Params.push_back(STy->getElementType(i)); + ++NumByValArgsPromoted; + } else if (!ArgsToPromote.count(I)) { + // Unchanged argument + Params.push_back(I->getType()); + if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); + } else if (I->use_empty()) { + // Dead argument (which are always marked as promotable) + ++NumArgumentsDead; + } else { + // Okay, this is being promoted. This means that the only uses are loads + // or GEPs which are only used by loads + + // In this table, we will track which indices are loaded from the argument + // (where direct loads are tracked as no indices). + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); + IndicesVector Indices; + Indices.reserve(User->getNumOperands() - 1); + // Since loads will only have a single operand, and GEPs only a single + // non-index operand, this will record direct loads without any indices, + // and gep+loads with the GEP indices. + for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); + II != IE; ++II) + Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + ArgIndices.insert(Indices); + LoadInst *OrigLoad; + if (LoadInst *L = dyn_cast<LoadInst>(User)) + OrigLoad = L; + else + // Take any load, we will use it only to update Alias Analysis + OrigLoad = cast<LoadInst>(User->use_back()); + OriginalLoads[Indices] = OrigLoad; + } + + // Add a parameter to the function for each element passed in. + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + // not allowed to dereference ->begin() if size() is 0 + Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), + SI->begin(), + SI->end())); + assert(Params.back()); + } + + if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) + ++NumArgumentsPromoted; + else + ++NumAggregatesPromoted; + } + } + + // Add any function attributes. + if (Attributes attrs = PAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + + const Type *RetTy = FTy->getReturnType(); + + // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which + // have zero fixed arguments. + bool ExtraArgHack = false; + if (Params.empty() && FTy->isVarArg()) { + ExtraArgHack = true; + Params.push_back(Type::getInt32Ty(F->getContext())); + } + + // Construct the new function type using the new arguments. + FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); + + // Create the new function body and insert it into the module. + Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); + NF->copyAttributesFrom(F); + + + DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + << "From: " << *F); + + // Recompute the parameter attributes list based on the new arguments for + // the function. + NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); + AttributesVec.clear(); + + F->getParent()->getFunctionList().insert(F, NF); + NF->takeName(F); + + // Get the alias analysis information that we need to update to reflect our + // changes. + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + // Get the callgraph information that we need to update to reflect our + // changes. + CallGraph &CG = getAnalysis<CallGraph>(); + + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + + + // Loop over all of the callers of the function, transforming the call sites + // to pass in the loaded pointers. + // + SmallVector<Value*, 16> Args; + while (!F->use_empty()) { + CallSite CS = CallSite::get(F->use_back()); + assert(CS.getCalledFunction() == F); + Instruction *Call = CS.getInstruction(); + const AttrListPtr &CallPAL = CS.getAttributes(); + + // Add any return attributes. + if (Attributes attrs = CallPAL.getRetAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + + // Loop over the operands, inserting GEP and loads in the caller as + // appropriate. + CallSite::arg_iterator AI = CS.arg_begin(); + ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++AI, ++ArgIndex) + if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + Args.push_back(*AI); // Unmodified argument + + if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + + } else if (ByValArgsToTransform.count(I)) { + // Emit a GEP and load for each element of the struct. + const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + const StructType *STy = cast<StructType>(AgTy); + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, + (*AI)->getName()+"."+utostr(i), + Call); + // TODO: Tell AA about the new values? + Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); + } + } else if (!I->use_empty()) { + // Non-dead argument: insert GEPs and loads as appropriate. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + // Store the Value* version of the indices in here, but declare it now + // for reuse. + std::vector<Value*> Ops; + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + Value *V = *AI; + LoadInst *OrigLoad = OriginalLoads[*SI]; + if (!SI->empty()) { + Ops.reserve(SI->size()); + const Type *ElTy = V->getType(); + for (IndicesVector::const_iterator II = SI->begin(), + IE = SI->end(); II != IE; ++II) { + // Use i32 to index structs, and i64 for others (pointers/arrays). + // This satisfies GEP constraints. + const Type *IdxTy = (ElTy->isStructTy() ? + Type::getInt32Ty(F->getContext()) : + Type::getInt64Ty(F->getContext())); + Ops.push_back(ConstantInt::get(IdxTy, *II)); + // Keep track of the type we're currently indexing. + ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); + } + // And create a GEP to extract those indices. + V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), + V->getName()+".idx", Call); + Ops.clear(); + AA.copyValue(OrigLoad->getOperand(0), V); + } + // Since we're replacing a load make sure we take the alignment + // of the previous load. + LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); + newLoad->setAlignment(OrigLoad->getAlignment()); + Args.push_back(newLoad); + AA.copyValue(OrigLoad, Args.back()); + } + } + + if (ExtraArgHack) + Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); + + // Push any varargs arguments on the list. + for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + Args.push_back(*AI); + if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + } + + // Add any function attributes. + if (Attributes attrs = CallPAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + Args.clear(); + AttributesVec.clear(); + + // Update the alias analysis implementation to know that we are replacing + // the old call with a new one. + AA.replaceWithNewValue(Call, New); + + // Update the callgraph to know that the callsite has been transformed. + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->replaceCallEdge(Call, New, NF_CGN); + + if (!Call->use_empty()) { + Call->replaceAllUsesWith(New); + New->takeName(Call); + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. + // + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I) { + if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + // If this is an unmodified argument, move the name and users over to the + // new version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + AA.replaceWithNewValue(I, I2); + ++I2; + continue; + } + + if (ByValArgsToTransform.count(I)) { + // In the callee, we create an alloca, and store each of the new incoming + // arguments into the alloca. + Instruction *InsertPt = NF->begin()->begin(); + + // Just add all the struct element types. + const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); + const StructType *STy = cast<StructType>(AgTy); + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = + GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + TheAlloca->getName()+"."+Twine(i), + InsertPt); + I2->setName(I->getName()+"."+Twine(i)); + new StoreInst(I2++, Idx, InsertPt); + } + + // Anything that used the arg should now use the alloca. + I->replaceAllUsesWith(TheAlloca); + TheAlloca->takeName(I); + AA.replaceWithNewValue(I, TheAlloca); + continue; + } + + if (I->use_empty()) { + AA.deleteValue(I); + continue; + } + + // Otherwise, if we promoted this argument, then all users are load + // instructions (or GEPs with only load users), and all loads should be + // using the new argument that we added. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + + while (!I->use_empty()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { + assert(ArgIndices.begin()->empty() && + "Load element should sort to front!"); + I2->setName(I->getName()+".val"); + LI->replaceAllUsesWith(I2); + AA.replaceWithNewValue(LI, I2); + LI->eraseFromParent(); + DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"); + } else { + GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); + IndicesVector Operands; + Operands.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); + + // GEPs with a single 0 index can be merged with direct loads + if (Operands.size() == 1 && Operands.front() == 0) + Operands.clear(); + + Function::arg_iterator TheArg = I2; + for (ScalarizeTable::iterator It = ArgIndices.begin(); + *It != Operands; ++It, ++TheArg) { + assert(It != ArgIndices.end() && "GEP not handled??"); + } + + std::string NewName = I->getName(); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + NewName += "." + utostr(Operands[i]); + } + NewName += ".val"; + TheArg->setName(NewName); + + DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << NF->getName() << "'\n"); + + // All of the uses must be load instructions. Replace them all with + // the argument specified by ArgNo. + while (!GEP->use_empty()) { + LoadInst *L = cast<LoadInst>(GEP->use_back()); + L->replaceAllUsesWith(TheArg); + AA.replaceWithNewValue(L, TheArg); + L->eraseFromParent(); + } + AA.deleteValue(GEP); + GEP->eraseFromParent(); + } + } + + // Increment I2 past all of the arguments added for this promoted pointer. + for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) + ++I2; + } + + // Notify the alias analysis implementation that we inserted a new argument. + if (ExtraArgHack) + AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), + NF->arg_begin()); + + + // Tell the alias analysis that the old function is about to disappear. + AA.replaceWithNewValue(F, NF); + + + NF_CGN->stealCalledFunctionsFrom(CG[F]); + + // Now that the old function is dead, delete it. If there is a dangling + // reference to the CallgraphNode, just leave the dead function around for + // someone else to nuke. + CallGraphNode *CGN = CG[F]; + if (CGN->getNumReferences() == 0) + delete CG.removeFunctionFromModule(CGN); + else + F->setLinkage(Function::ExternalLinkage); + + return NF_CGN; +} diff --git a/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt b/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt new file mode 100644 index 0000000..65483e8 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -0,0 +1,27 @@ +add_llvm_library(LLVMipo + ArgumentPromotion.cpp + ConstantMerge.cpp + DeadArgumentElimination.cpp + DeadTypeElimination.cpp + ExtractGV.cpp + FunctionAttrs.cpp + GlobalDCE.cpp + GlobalOpt.cpp + IPConstantPropagation.cpp + IPO.cpp + InlineAlways.cpp + InlineSimple.cpp + Inliner.cpp + Internalize.cpp + LoopExtractor.cpp + LowerSetJmp.cpp + MergeFunctions.cpp + PartialInlining.cpp + PartialSpecialization.cpp + PruneEH.cpp + StripDeadPrototypes.cpp + StripSymbols.cpp + StructRetPromotion.cpp + ) + +target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine) diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp new file mode 100644 index 0000000..3c05f88 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -0,0 +1,116 @@ +//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to a pass that merges duplicate global +// constants together into a single constant that is shared. This is useful +// because some passes (ie TraceValues) insert a lot of string constants into +// the program, regardless of whether or not an existing string is available. +// +// Algorithm: ConstantMerge is designed to build up a map of available constants +// and eliminate duplicates when it is initialized. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "constmerge" +#include "llvm/Transforms/IPO.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumMerged, "Number of global constants merged"); + +namespace { + struct ConstantMerge : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ConstantMerge() : ModulePass(&ID) {} + + // run - For this pass, process all of the globals in the module, + // eliminating duplicate constants. + // + bool runOnModule(Module &M); + }; +} + +char ConstantMerge::ID = 0; +static RegisterPass<ConstantMerge> +X("constmerge", "Merge Duplicate Global Constants"); + +ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + +bool ConstantMerge::runOnModule(Module &M) { + // Map unique constant/section pairs to globals. We don't want to merge + // globals in different sections. + DenseMap<Constant*, GlobalVariable*> CMap; + + // Replacements - This vector contains a list of replacements to perform. + SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; + + bool MadeChange = false; + + // Iterate constant merging while we are still making progress. Merging two + // constants together may allow us to merge other constants together if the + // second level constants have initializers which point to the globals that + // were just merged. + while (1) { + // First pass: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + // + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + + // If this GV is dead, remove it. + GV->removeDeadConstantUsers(); + if (GV->use_empty() && GV->hasLocalLinkage()) { + GV->eraseFromParent(); + continue; + } + + // Only process constants with initializers in the default addres space. + if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *&Slot = CMap[Init]; + + if (Slot == 0) { // Nope, add it to the map. + Slot = GV; + } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } + } + + if (Replacements.empty()) + return MadeChange; + CMap.clear(); + + // Now that we have figured out which replacements must be made, do them all + // now. This avoid invalidating the pointers in CMap, which are unneeded + // now. + for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Eliminate any uses of the dead global. + Replacements[i].first->replaceAllUsesWith(Replacements[i].second); + + // Delete the global value from the module. + Replacements[i].first->eraseFromParent(); + } + + NumMerged += Replacements.size(); + Replacements.clear(); + } +} diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp new file mode 100644 index 0000000..692e47d --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -0,0 +1,934 @@ +//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass deletes dead arguments from internal functions. Dead argument +// elimination removes arguments which are directly dead, as well as arguments +// only passed into function calls as dead arguments of other functions. This +// pass also deletes dead return values in a similar way. +// +// This pass is often useful as a cleanup pass to run after aggressive +// interprocedural passes, which add possibly-dead arguments or return values. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "deadargelim" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallingConv.h" +#include "llvm/Constant.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include <map> +#include <set> +using namespace llvm; + +STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); +STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); + +namespace { + /// DAE - The dead argument elimination pass. + /// + class DAE : public ModulePass { + public: + + /// Struct that represents (part of) either a return value or a function + /// argument. Used so that arguments and return values can be used + /// interchangably. + struct RetOrArg { + RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), + IsArg(IsArg) {} + const Function *F; + unsigned Idx; + bool IsArg; + + /// Make RetOrArg comparable, so we can put it into a map. + bool operator<(const RetOrArg &O) const { + if (F != O.F) + return F < O.F; + else if (Idx != O.Idx) + return Idx < O.Idx; + else + return IsArg < O.IsArg; + } + + /// Make RetOrArg comparable, so we can easily iterate the multimap. + bool operator==(const RetOrArg &O) const { + return F == O.F && Idx == O.Idx && IsArg == O.IsArg; + } + + std::string getDescription() const { + return std::string((IsArg ? "Argument #" : "Return value #")) + + utostr(Idx) + " of function " + F->getNameStr(); + } + }; + + /// Liveness enum - During our initial pass over the program, we determine + /// that things are either alive or maybe alive. We don't mark anything + /// explicitly dead (even if we know they are), since anything not alive + /// with no registered uses (in Uses) will never be marked alive and will + /// thus become dead in the end. + enum Liveness { Live, MaybeLive }; + + /// Convenience wrapper + RetOrArg CreateRet(const Function *F, unsigned Idx) { + return RetOrArg(F, Idx, false); + } + /// Convenience wrapper + RetOrArg CreateArg(const Function *F, unsigned Idx) { + return RetOrArg(F, Idx, true); + } + + typedef std::multimap<RetOrArg, RetOrArg> UseMap; + /// This maps a return value or argument to any MaybeLive return values or + /// arguments it uses. This allows the MaybeLive values to be marked live + /// when any of its users is marked live. + /// For example (indices are left out for clarity): + /// - Uses[ret F] = ret G + /// This means that F calls G, and F returns the value returned by G. + /// - Uses[arg F] = ret G + /// This means that some function calls G and passes its result as an + /// argument to F. + /// - Uses[ret F] = arg F + /// This means that F returns one of its own arguments. + /// - Uses[arg F] = arg G + /// This means that G calls F and passes one of its own (G's) arguments + /// directly to F. + UseMap Uses; + + typedef std::set<RetOrArg> LiveSet; + typedef std::set<const Function*> LiveFuncSet; + + /// This set contains all values that have been determined to be live. + LiveSet LiveValues; + /// This set contains all values that are cannot be changed in any way. + LiveFuncSet LiveFunctions; + + typedef SmallVector<RetOrArg, 5> UseVector; + + public: + static char ID; // Pass identification, replacement for typeid + DAE() : ModulePass(&ID) {} + bool runOnModule(Module &M); + + virtual bool ShouldHackArguments() const { return false; } + + private: + Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); + Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses, + unsigned RetValNum = 0); + Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); + + void SurveyFunction(const Function &F); + void MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses); + void MarkLive(const RetOrArg &RA); + void MarkLive(const Function &F); + void PropagateLiveness(const RetOrArg &RA); + bool RemoveDeadStuffFromFunction(Function *F); + bool DeleteDeadVarargs(Function &Fn); + }; +} + + +char DAE::ID = 0; +static RegisterPass<DAE> +X("deadargelim", "Dead Argument Elimination"); + +namespace { + /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but + /// deletes arguments to functions which are external. This is only for use + /// by bugpoint. + struct DAH : public DAE { + static char ID; + virtual bool ShouldHackArguments() const { return true; } + }; +} + +char DAH::ID = 0; +static RegisterPass<DAH> +Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)"); + +/// createDeadArgEliminationPass - This pass removes arguments from functions +/// which are not used by the body of the function. +/// +ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } +ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } + +/// DeleteDeadVarargs - If this is an function that takes a ... list, and if +/// llvm.vastart is never called, the varargs list is dead for the function. +bool DAE::DeleteDeadVarargs(Function &Fn) { + assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); + if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; + + // Ensure that the function is only directly called. + if (Fn.hasAddressTaken()) + return false; + + // Okay, we know we can transform this function if safe. Scan its body + // looking for calls to llvm.vastart. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + return false; + } + } + } + + // If we get here, there are no calls to llvm.vastart in the function body, + // remove the "..." and adjust all the calls. + + // Start by computing a new prototype for the function, which is the same as + // the old function, but doesn't have isVarArg set. + const FunctionType *FTy = Fn.getFunctionType(); + + std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), + Params, false); + unsigned NumArgs = Params.size(); + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, Fn.getLinkage()); + NF->copyAttributesFrom(&Fn); + Fn.getParent()->getFunctionList().insert(&Fn, NF); + NF->takeName(&Fn); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector<Value*> Args; + while (!Fn.use_empty()) { + CallSite CS = CallSite::get(Fn.use_back()); + Instruction *Call = CS.getInstruction(); + + // Pass all the same arguments. + Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs); + + // Drop any attributes that were on the vararg arguments. + AttrListPtr PAL = CS.getAttributes(); + if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { + SmallVector<AttributeWithIndex, 8> AttributesVec; + for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) + AttributesVec.push_back(PAL.getSlot(i)); + if (Attributes FnAttrs = PAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); + } + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(PAL); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(PAL); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + if (MDNode *N = Call->getDbgMetadata()) + New->setDbgMetadata(N); + + Args.clear(); + + if (!Call->use_empty()) + Call->replaceAllUsesWith(New); + + New->takeName(Call); + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. While we're at + // it, remove the dead arguments from the DeadArguments list. + // + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++I2) { + // Move the name and users over to the new version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + } + + // Finally, nuke the old function. + Fn.eraseFromParent(); + return true; +} + +/// Convenience function that returns the number of return values. It returns 0 +/// for void functions and 1 for functions not returning a struct. It returns +/// the number of struct elements for functions returning a struct. +static unsigned NumRetVals(const Function *F) { + if (F->getReturnType()->isVoidTy()) + return 0; + else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) + return STy->getNumElements(); + else + return 1; +} + +/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not +/// live, it adds Use to the MaybeLiveUses argument. Returns the determined +/// liveness of Use. +DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { + // We're live if our use or its Function is already marked as live. + if (LiveFunctions.count(Use.F) || LiveValues.count(Use)) + return Live; + + // We're maybe live otherwise, but remember that we must become live if + // Use becomes live. + MaybeLiveUses.push_back(Use); + return MaybeLive; +} + + +/// SurveyUse - This looks at a single use of an argument or return value +/// and determines if it should be alive or not. Adds this use to MaybeLiveUses +/// if it causes the used value to become MaybeLive. +/// +/// RetValNum is the return value number to use when this use is used in a +/// return instruction. This is used in the recursion, you should always leave +/// it at 0. +DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, + UseVector &MaybeLiveUses, unsigned RetValNum) { + const User *V = *U; + if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { + // The value is returned from a function. It's only live when the + // function's return value is live. We use RetValNum here, for the case + // that U is really a use of an insertvalue instruction that uses the + // orginal Use. + RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); + // We might be live, depending on the liveness of Use. + return MarkIfNotLive(Use, MaybeLiveUses); + } + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { + if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex() + && IV->hasIndices()) + // The use we are examining is inserted into an aggregate. Our liveness + // depends on all uses of that aggregate, but if it is used as a return + // value, only index at which we were inserted counts. + RetValNum = *IV->idx_begin(); + + // Note that if we are used as the aggregate operand to the insertvalue, + // we don't change RetValNum, but do survey all our uses. + + Liveness Result = MaybeLive; + for (Value::const_use_iterator I = IV->use_begin(), + E = V->use_end(); I != E; ++I) { + Result = SurveyUse(I, MaybeLiveUses, RetValNum); + if (Result == Live) + break; + } + return Result; + } + + if (ImmutableCallSite CS = V) { + const Function *F = CS.getCalledFunction(); + if (F) { + // Used in a direct call. + + // Find the argument number. We know for sure that this use is an + // argument, since if it was the function argument this would be an + // indirect call and the we know can't be looking at a value of the + // label type (for the invoke instruction). + unsigned ArgNo = CS.getArgumentNo(U); + + if (ArgNo >= F->getFunctionType()->getNumParams()) + // The value is passed in through a vararg! Must be live. + return Live; + + assert(CS.getArgument(ArgNo) + == CS->getOperand(U.getOperandNo()) + && "Argument is not where we expected it"); + + // Value passed to a normal call. It's only live when the corresponding + // argument to the called function turns out live. + RetOrArg Use = CreateArg(F, ArgNo); + return MarkIfNotLive(Use, MaybeLiveUses); + } + } + // Used in any other way? Value must be live. + return Live; +} + +/// SurveyUses - This looks at all the uses of the given value +/// Returns the Liveness deduced from the uses of this value. +/// +/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If +/// the result is Live, MaybeLiveUses might be modified but its content should +/// be ignored (since it might not be complete). +DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { + // Assume it's dead (which will only hold if there are no uses at all..). + Liveness Result = MaybeLive; + // Check each use. + for (Value::const_use_iterator I = V->use_begin(), + E = V->use_end(); I != E; ++I) { + Result = SurveyUse(I, MaybeLiveUses); + if (Result == Live) + break; + } + return Result; +} + +// SurveyFunction - This performs the initial survey of the specified function, +// checking out whether or not it uses any of its incoming arguments or whether +// any callers use the return value. This fills in the LiveValues set and Uses +// map. +// +// We consider arguments of non-internal functions to be intrinsically alive as +// well as arguments to functions which have their "address taken". +// +void DAE::SurveyFunction(const Function &F) { + unsigned RetCount = NumRetVals(&F); + // Assume all return values are dead + typedef SmallVector<Liveness, 5> RetVals; + RetVals RetValLiveness(RetCount, MaybeLive); + + typedef SmallVector<UseVector, 5> RetUses; + // These vectors map each return value to the uses that make it MaybeLive, so + // we can add those to the Uses map if the return value really turns out to be + // MaybeLive. Initialized to a list of RetCount empty lists. + RetUses MaybeLiveRetUses(RetCount); + + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() + != F.getFunctionType()->getReturnType()) { + // We don't support old style multiple return values. + MarkLive(F); + return; + } + + if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) { + MarkLive(F); + return; + } + + DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); + // Keep track of the number of live retvals, so we can skip checks once all + // of them turn out to be live. + unsigned NumLiveRetVals = 0; + const Type *STy = dyn_cast<StructType>(F.getReturnType()); + // Loop all uses of the function. + for (Value::const_use_iterator I = F.use_begin(), E = F.use_end(); + I != E; ++I) { + // If the function is PASSED IN as an argument, its address has been + // taken. + ImmutableCallSite CS(*I); + if (!CS || !CS.isCallee(I)) { + MarkLive(F); + return; + } + + // If this use is anything other than a call site, the function is alive. + const Instruction *TheCall = CS.getInstruction(); + if (!TheCall) { // Not a direct call site? + MarkLive(F); + return; + } + + // If we end up here, we are looking at a direct call to our function. + + // Now, check how our return value(s) is/are used in this caller. Don't + // bother checking return values if all of them are live already. + if (NumLiveRetVals != RetCount) { + if (STy) { + // Check all uses of the return value. + for (Value::const_use_iterator I = TheCall->use_begin(), + E = TheCall->use_end(); I != E; ++I) { + const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I); + if (Ext && Ext->hasIndices()) { + // This use uses a part of our return value, survey the uses of + // that part and store the results for this index only. + unsigned Idx = *Ext->idx_begin(); + if (RetValLiveness[Idx] != Live) { + RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); + if (RetValLiveness[Idx] == Live) + NumLiveRetVals++; + } + } else { + // Used by something else than extractvalue. Mark all return + // values as live. + for (unsigned i = 0; i != RetCount; ++i ) + RetValLiveness[i] = Live; + NumLiveRetVals = RetCount; + break; + } + } + } else { + // Single return value + RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]); + if (RetValLiveness[0] == Live) + NumLiveRetVals = RetCount; + } + } + } + + // Now we've inspected all callers, record the liveness of our return values. + for (unsigned i = 0; i != RetCount; ++i) + MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); + + DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); + + // Now, check all of our arguments. + unsigned i = 0; + UseVector MaybeLiveArgUses; + for (Function::const_arg_iterator AI = F.arg_begin(), + E = F.arg_end(); AI != E; ++AI, ++i) { + // See what the effect of this use is (recording any uses that cause + // MaybeLive in MaybeLiveArgUses). + Liveness Result = SurveyUses(AI, MaybeLiveArgUses); + // Mark the result. + MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses); + // Clear the vector again for the next iteration. + MaybeLiveArgUses.clear(); + } +} + +/// MarkValue - This function marks the liveness of RA depending on L. If L is +/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses, +/// such that RA will be marked live if any use in MaybeLiveUses gets marked +/// live later on. +void DAE::MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses) { + switch (L) { + case Live: MarkLive(RA); break; + case MaybeLive: + { + // Note any uses of this value, so this return value can be + // marked live whenever one of the uses becomes live. + for (UseVector::const_iterator UI = MaybeLiveUses.begin(), + UE = MaybeLiveUses.end(); UI != UE; ++UI) + Uses.insert(std::make_pair(*UI, RA)); + break; + } + } +} + +/// MarkLive - Mark the given Function as alive, meaning that it cannot be +/// changed in any way. Additionally, +/// mark any values that are used as this function's parameters or by its return +/// values (according to Uses) live as well. +void DAE::MarkLive(const Function &F) { + DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); + // Mark the function as live. + LiveFunctions.insert(&F); + // Mark all arguments as live. + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) + PropagateLiveness(CreateArg(&F, i)); + // Mark all return values as live. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + PropagateLiveness(CreateRet(&F, i)); +} + +/// MarkLive - Mark the given return value or argument as live. Additionally, +/// mark any values that are used by this value (according to Uses) live as +/// well. +void DAE::MarkLive(const RetOrArg &RA) { + if (LiveFunctions.count(RA.F)) + return; // Function was already marked Live. + + if (!LiveValues.insert(RA).second) + return; // We were already marked Live. + + DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n"); + PropagateLiveness(RA); +} + +/// PropagateLiveness - Given that RA is a live value, propagate it's liveness +/// to any other values it uses (according to Uses). +void DAE::PropagateLiveness(const RetOrArg &RA) { + // We don't use upper_bound (or equal_range) here, because our recursive call + // to ourselves is likely to cause the upper_bound (which is the first value + // not belonging to RA) to become erased and the iterator invalidated. + UseMap::iterator Begin = Uses.lower_bound(RA); + UseMap::iterator E = Uses.end(); + UseMap::iterator I; + for (I = Begin; I != E && I->first == RA; ++I) + MarkLive(I->second); + + // Erase RA from the Uses map (from the lower bound to wherever we ended up + // after the loop). + Uses.erase(Begin, I); +} + +// RemoveDeadStuffFromFunction - Remove any arguments and return values from F +// that are not in LiveValues. Transform the function and all of the callees of +// the function to not have these arguments and return values. +// +bool DAE::RemoveDeadStuffFromFunction(Function *F) { + // Don't modify fully live functions + if (LiveFunctions.count(F)) + return false; + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has fewer arguments and a different return type. + const FunctionType *FTy = F->getFunctionType(); + std::vector<const Type*> Params; + + // Set up to build a new list of parameter attributes. + SmallVector<AttributeWithIndex, 8> AttributesVec; + const AttrListPtr &PAL = F->getAttributes(); + + // The existing function return attributes. + Attributes RAttrs = PAL.getRetAttributes(); + Attributes FnAttrs = PAL.getFnAttributes(); + + // Find out the new return value. + + const Type *RetTy = FTy->getReturnType(); + const Type *NRetTy = NULL; + unsigned RetCount = NumRetVals(F); + + // -1 means unused, other numbers are the new index + SmallVector<int, 5> NewRetIdxs(RetCount, -1); + std::vector<const Type*> RetTypes; + if (RetTy->isVoidTy()) { + NRetTy = RetTy; + } else { + const StructType *STy = dyn_cast<StructType>(RetTy); + if (STy) + // Look at each of the original return values individually. + for (unsigned i = 0; i != RetCount; ++i) { + RetOrArg Ret = CreateRet(F, i); + if (LiveValues.erase(Ret)) { + RetTypes.push_back(STy->getElementType(i)); + NewRetIdxs[i] = RetTypes.size() - 1; + } else { + ++NumRetValsEliminated; + DEBUG(dbgs() << "DAE - Removing return value " << i << " from " + << F->getName() << "\n"); + } + } + else + // We used to return a single value. + if (LiveValues.erase(CreateRet(F, 0))) { + RetTypes.push_back(RetTy); + NewRetIdxs[0] = 0; + } else { + DEBUG(dbgs() << "DAE - Removing return value from " << F->getName() + << "\n"); + ++NumRetValsEliminated; + } + if (RetTypes.size() > 1) + // More than one return type? Return a struct with them. Also, if we used + // to return a struct and didn't change the number of return values, + // return a struct again. This prevents changing {something} into + // something and {} into void. + // Make the new struct packed if we used to return a packed struct + // already. + NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); + else if (RetTypes.size() == 1) + // One return type? Just a simple value then, but only if we didn't use to + // return a struct with that simple value before. + NRetTy = RetTypes.front(); + else if (RetTypes.size() == 0) + // No return types? Make it void, but only if we didn't use to return {}. + NRetTy = Type::getVoidTy(F->getContext()); + } + + assert(NRetTy && "No new return type found?"); + + // Remove any incompatible attributes, but only if we removed all return + // values. Otherwise, ensure that we don't have any conflicting attributes + // here. Currently, this should not be possible, but special handling might be + // required when new return value attributes are added. + if (NRetTy->isVoidTy()) + RAttrs &= ~Attribute::typeIncompatible(NRetTy); + else + assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 + && "Return attributes no longer compatible?"); + + if (RAttrs) + AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + // Remember which arguments are still alive. + SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false); + // Construct the new parameter list from non-dead arguments. Also construct + // a new set of parameter attributes to correspond. Skip the first parameter + // attribute, since that belongs to the return value. + unsigned i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++i) { + RetOrArg Arg = CreateArg(F, i); + if (LiveValues.erase(Arg)) { + Params.push_back(I->getType()); + ArgAlive[i] = true; + + // Get the original parameter attributes (skipping the first one, that is + // for the return value. + if (Attributes Attrs = PAL.getParamAttributes(i + 1)) + AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); + } else { + ++NumArgumentsEliminated; + DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() + << ") from " << F->getName() << "\n"); + } + } + + if (FnAttrs != Attribute::None) + AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + // Reconstruct the AttributesList based on the vector we constructed. + AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end()); + + // Create the new function type based on the recomputed parameters. + FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); + + // No change? + if (NFTy == FTy) + return false; + + // Create the new function body and insert it into the module... + Function *NF = Function::Create(NFTy, F->getLinkage()); + NF->copyAttributesFrom(F); + NF->setAttributes(NewPAL); + // Insert the new function before the old function, so we won't be processing + // it again. + F->getParent()->getFunctionList().insert(F, NF); + NF->takeName(F); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector<Value*> Args; + while (!F->use_empty()) { + CallSite CS = CallSite::get(F->use_back()); + Instruction *Call = CS.getInstruction(); + + AttributesVec.clear(); + const AttrListPtr &CallPAL = CS.getAttributes(); + + // The call return attributes. + Attributes RAttrs = CallPAL.getRetAttributes(); + Attributes FnAttrs = CallPAL.getFnAttributes(); + // Adjust in case the function was changed to return void. + RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType()); + if (RAttrs) + AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + // Declare these outside of the loops, so we can reuse them for the second + // loop, which loops the varargs. + CallSite::arg_iterator I = CS.arg_begin(); + unsigned i = 0; + // Loop over those operands, corresponding to the normal arguments to the + // original function, and add those that are still alive. + for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i) + if (ArgAlive[i]) { + Args.push_back(*I); + // Get original parameter attributes, but skip return attributes. + if (Attributes Attrs = CallPAL.getParamAttributes(i + 1)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + } + + // Push any varargs arguments on the list. Don't forget their attributes. + for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { + Args.push_back(*I); + if (Attributes Attrs = CallPAL.getParamAttributes(i + 1)) + AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + } + + if (FnAttrs != Attribute::None) + AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + // Reconstruct the AttributesList based on the vector we constructed. + AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end()); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(NewCallPAL); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(NewCallPAL); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + if (MDNode *N = Call->getDbgMetadata()) + New->setDbgMetadata(N); + + Args.clear(); + + if (!Call->use_empty()) { + if (New->getType() == Call->getType()) { + // Return type not changed? Just replace users then. + Call->replaceAllUsesWith(New); + New->takeName(Call); + } else if (New->getType()->isVoidTy()) { + // Our return value has uses, but they will get removed later on. + // Replace by null for now. + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + } else { + assert(RetTy->isStructTy() && + "Return type changed, but not into a void. The old return type" + " must have been a struct!"); + Instruction *InsertPt = Call; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + BasicBlock::iterator IP = II->getNormalDest()->begin(); + while (isa<PHINode>(IP)) ++IP; + InsertPt = IP; + } + + // We used to return a struct. Instead of doing smart stuff with all the + // uses of this struct, we will just rebuild it using + // extract/insertvalue chaining and let instcombine clean that up. + // + // Start out building up our return value from undef + Value *RetVal = UndefValue::get(RetTy); + for (unsigned i = 0; i != RetCount; ++i) + if (NewRetIdxs[i] != -1) { + Value *V; + if (RetTypes.size() > 1) + // We are still returning a struct, so extract the value from our + // return value + V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret", + InsertPt); + else + // We are now returning a single element, so just insert that + V = New; + // Insert the value at the old position + RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt); + } + // Now, replace all uses of the old call instruction with the return + // struct we built + Call->replaceAllUsesWith(RetVal); + New->takeName(Call); + } + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. + i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++i) + if (ArgAlive[i]) { + // If this is a live argument, move the name and users over to the new + // version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + ++I2; + } else { + // If this argument is dead, replace any uses of it with null constants + // (these are guaranteed to become unused later on). + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + } + + // If we change the return value of the function we must rewrite any return + // instructions. Check this now. + if (F->getReturnType() != NF->getReturnType()) + for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + Value *RetVal; + + if (NFTy->getReturnType()->isVoidTy()) { + RetVal = 0; + } else { + assert (RetTy->isStructTy()); + // The original return value was a struct, insert + // extractvalue/insertvalue chains to extract only the values we need + // to return and insert them into our new result. + // This does generate messy code, but we'll let it to instcombine to + // clean that up. + Value *OldRet = RI->getOperand(0); + // Start out building up our return value from undef + RetVal = UndefValue::get(NRetTy); + for (unsigned i = 0; i != RetCount; ++i) + if (NewRetIdxs[i] != -1) { + ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, + "oldret", RI); + if (RetTypes.size() > 1) { + // We're still returning a struct, so reinsert the value into + // our new return value at the new index + + RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i], + "newret", RI); + } else { + // We are now only returning a simple value, so just return the + // extracted value. + RetVal = EV; + } + } + } + // Replace the return instruction with one returning the new return + // value (possibly 0 if we became void). + ReturnInst::Create(F->getContext(), RetVal, RI); + BB->getInstList().erase(RI); + } + + // Now that the old function is dead, delete it. + F->eraseFromParent(); + + return true; +} + +bool DAE::runOnModule(Module &M) { + bool Changed = false; + + // First pass: Do a simple check to see if any functions can have their "..." + // removed. We can do this if they never call va_start. This loop cannot be + // fused with the next loop, because deleting a function invalidates + // information computed while surveying other functions. + DEBUG(dbgs() << "DAE - Deleting dead varargs\n"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function &F = *I++; + if (F.getFunctionType()->isVarArg()) + Changed |= DeleteDeadVarargs(F); + } + + // Second phase:loop through the module, determining which arguments are live. + // We assume all arguments are dead unless proven otherwise (allowing us to + // determine that dead arguments passed into recursive functions are dead). + // + DEBUG(dbgs() << "DAE - Determining liveness\n"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + SurveyFunction(*I); + + // Now, remove all dead arguments and return values from each function in + // turn. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + // Increment now, because the function will probably get removed (ie. + // replaced by a new one). + Function *F = I++; + Changed |= RemoveDeadStuffFromFunction(F); + } + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp new file mode 100644 index 0000000..662fbb5 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -0,0 +1,106 @@ +//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to cleanup the output of GCC. It eliminate names for types +// that are unused in the entire translation unit, using the FindUsedTypes pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "deadtypeelim" +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Module.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); + +namespace { + struct DTE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + DTE() : ModulePass(&ID) {} + + // doPassInitialization - For this pass, it removes global symbol table + // entries for primitive types. These are never used for linking in GCC and + // they make the output uglier to look at, so we nuke them. + // + // Also, initialize instance variables. + // + bool runOnModule(Module &M); + + // getAnalysisUsage - This function needs FindUsedTypes to do its job... + // + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<FindUsedTypes>(); + } + }; +} + +char DTE::ID = 0; +static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination"); + +ModulePass *llvm::createDeadTypeEliminationPass() { + return new DTE(); +} + + +// ShouldNukeSymtabEntry - Return true if this module level symbol table entry +// should be eliminated. +// +static inline bool ShouldNukeSymtabEntry(const Type *Ty){ + // Nuke all names for primitive types! + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) + return true; + + // Nuke all pointers to primitive types as well... + if (const PointerType *PT = dyn_cast<PointerType>(Ty)) + if (PT->getElementType()->isPrimitiveType() || + PT->getElementType()->isIntegerTy()) + return true; + + return false; +} + +// run - For this pass, it removes global symbol table entries for primitive +// types. These are never used for linking in GCC and they make the output +// uglier to look at, so we nuke them. Also eliminate types that are never used +// in the entire program as indicated by FindUsedTypes. +// +bool DTE::runOnModule(Module &M) { + bool Changed = false; + + TypeSymbolTable &ST = M.getTypeSymbolTable(); + std::set<const Type *> UsedTypes = getAnalysis<FindUsedTypes>().getTypes(); + + // Check the symbol table for superfluous type entries... + // + // Grab the 'type' plane of the module symbol... + TypeSymbolTable::iterator TI = ST.begin(); + TypeSymbolTable::iterator TE = ST.end(); + while ( TI != TE ) { + // If this entry should be unconditionally removed, or if we detect that + // the type is not used, remove it. + const Type *RHS = TI->second; + if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) { + ST.remove(TI++); + ++NumKilled; + Changed = true; + } else { + ++TI; + // We only need to leave one name for each type. + UsedTypes.erase(RHS); + } + } + + return Changed; +} + +// vim: sw=2 diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp new file mode 100644 index 0000000..7f67e48 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -0,0 +1,175 @@ +//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass extracts global values +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Constants.h" +#include "llvm/Transforms/IPO.h" +#include <algorithm> +using namespace llvm; + +namespace { + /// @brief A pass to extract specific functions and their dependencies. + class GVExtractorPass : public ModulePass { + std::vector<GlobalValue*> Named; + bool deleteStuff; + bool reLink; + public: + static char ID; // Pass identification, replacement for typeid + + /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the + /// specified function. Otherwise, it deletes as much of the module as + /// possible, except for the function specified. + /// + explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true, + bool relinkCallees = false) + : ModulePass(&ID), Named(GVs), deleteStuff(deleteS), + reLink(relinkCallees) {} + + bool runOnModule(Module &M) { + if (Named.size() == 0) { + return false; // Nothing to extract + } + + + if (deleteStuff) + return deleteGV(); + M.setModuleInlineAsm(""); + return isolateGV(M); + } + + bool deleteGV() { + for (std::vector<GlobalValue*>::iterator GI = Named.begin(), + GE = Named.end(); GI != GE; ++GI) { + if (Function* NamedFunc = dyn_cast<Function>(*GI)) { + // If we're in relinking mode, set linkage of all internal callees to + // external. This will allow us extract function, and then - link + // everything together + if (reLink) { + for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end(); + B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), E = B->end(); + I != E; ++I) { + if (CallInst* callInst = dyn_cast<CallInst>(&*I)) { + Function* Callee = callInst->getCalledFunction(); + if (Callee && Callee->hasLocalLinkage()) + Callee->setLinkage(GlobalValue::ExternalLinkage); + } + } + } + } + + NamedFunc->setLinkage(GlobalValue::ExternalLinkage); + NamedFunc->deleteBody(); + assert(NamedFunc->isDeclaration() && "This didn't make the function external!"); + } else { + if (!(*GI)->isDeclaration()) { + cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer + (*GI)->setLinkage(GlobalValue::ExternalLinkage); + } + } + } + return true; + } + + bool isolateGV(Module &M) { + // Mark all globals internal + // FIXME: what should we do with private linkage? + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + if (!I->isDeclaration()) { + I->setLinkage(GlobalValue::InternalLinkage); + } + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) { + I->setLinkage(GlobalValue::InternalLinkage); + } + + // Make sure our result is globally accessible... + // by putting them in the used array + { + std::vector<Constant *> AUGs; + const Type *SBP= + Type::getInt8PtrTy(M.getContext()); + for (std::vector<GlobalValue*>::iterator GI = Named.begin(), + GE = Named.end(); GI != GE; ++GI) { + (*GI)->setLinkage(GlobalValue::ExternalLinkage); + AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP)); + } + ArrayType *AT = ArrayType::get(SBP, AUGs.size()); + Constant *Init = ConstantArray::get(AT, AUGs); + GlobalValue *gv = new GlobalVariable(M, AT, false, + GlobalValue::AppendingLinkage, + Init, "llvm.used"); + gv->setSection("llvm.metadata"); + } + + // All of the functions may be used by global variables or the named + // globals. Loop through them and create a new, external functions that + // can be "used", instead of ones with bodies. + std::vector<Function*> NewFunctions; + + Function *Last = --M.end(); // Figure out where the last real fn is. + + for (Module::iterator I = M.begin(); ; ++I) { + if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { + Function *New = Function::Create(I->getFunctionType(), + GlobalValue::ExternalLinkage); + New->copyAttributesFrom(I); + + // If it's not the named function, delete the body of the function + I->dropAllReferences(); + + M.getFunctionList().push_back(New); + NewFunctions.push_back(New); + New->takeName(I); + } + + if (&*I == Last) break; // Stop after processing the last function + } + + // Now that we have replacements all set up, loop through the module, + // deleting the old functions, replacing them with the newly created + // functions. + if (!NewFunctions.empty()) { + unsigned FuncNum = 0; + Module::iterator I = M.begin(); + do { + if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { + // Make everything that uses the old function use the new dummy fn + I->replaceAllUsesWith(NewFunctions[FuncNum++]); + + Function *Old = I; + ++I; // Move the iterator to the new function + + // Delete the old function! + M.getFunctionList().erase(Old); + + } else { + ++I; // Skip the function we are extracting + } + } while (&*I != NewFunctions[0]); + } + + return true; + } + }; + + char GVExtractorPass::ID = 0; +} + +ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, + bool deleteFn, bool relinkCallees) { + return new GVExtractorPass(GVs, deleteFn, relinkCallees); +} diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp new file mode 100644 index 0000000..9bd7af6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -0,0 +1,391 @@ +//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, looking for functions which do not access or only read +// non-local memory, and marking them readnone/readonly. In addition, +// it marks function arguments (of pointer type) 'nocapture' if a call +// to the function does not create any copies of the pointer value that +// outlive the call. This more or less means that the pointer is only +// dereferenced, and not returned from the function or stored in a global. +// This pass is implemented as a bottom-up traversal of the call-graph. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "functionattrs" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/InstIterator.h" +using namespace llvm; + +STATISTIC(NumReadNone, "Number of functions marked readnone"); +STATISTIC(NumReadOnly, "Number of functions marked readonly"); +STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); +STATISTIC(NumNoAlias, "Number of function returns marked noalias"); + +namespace { + struct FunctionAttrs : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + FunctionAttrs() : CallGraphSCCPass(&ID) {} + + // runOnSCC - Analyze the SCC, performing the transformation if possible. + bool runOnSCC(CallGraphSCC &SCC); + + // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. + bool AddReadAttrs(const CallGraphSCC &SCC); + + // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. + bool AddNoCaptureAttrs(const CallGraphSCC &SCC); + + // IsFunctionMallocLike - Does this function allocate new memory? + bool IsFunctionMallocLike(Function *F, + SmallPtrSet<Function*, 8> &) const; + + // AddNoAliasAttrs - Deduce noalias attributes for the SCC. + bool AddNoAliasAttrs(const CallGraphSCC &SCC); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool PointsToLocalMemory(Value *V); + }; +} + +char FunctionAttrs::ID = 0; +static RegisterPass<FunctionAttrs> +X("functionattrs", "Deduce function attributes"); + +Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } + + +/// PointsToLocalMemory - Returns whether the given pointer value points to +/// memory that is local to the function. Global constants are considered +/// local to all functions. +bool FunctionAttrs::PointsToLocalMemory(Value *V) { + SmallVector<Value*, 16> Worklist; + unsigned MaxLookup = 8; + + Worklist.push_back(V); + + do { + V = Worklist.pop_back_val()->getUnderlyingObject(); + + // An alloca instruction defines local memory. + if (isa<AllocaInst>(V)) + continue; + + // A global constant counts as local memory for our purposes. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (!GV->isConstant()) + return false; + continue; + } + + // If both select values point to local memory, then so does the select. + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to local memory, then so does + // the phi. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) + return false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + return false; + } while (!Worklist.empty() && --MaxLookup); + + return Worklist.empty(); +} + +/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. +bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { + SmallPtrSet<Function*, 8> SCCNodes; + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert((*I)->getFunction()); + + // Check if any of the functions in the SCC read or write memory. If they + // write memory then they can't be marked readnone or readonly. + bool ReadsMemory = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F == 0) + // External node - may write memory. Just give up. + return false; + + if (F->doesNotAccessMemory()) + // Already perfect! + continue; + + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) { + if (!F->onlyReadsMemory()) + // May write memory. Just give up. + return false; + + ReadsMemory = true; + continue; + } + + // Scan the function body for instructions that may read or write memory. + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + + // Some instructions can be ignored even if they read or write memory. + // Detect these now, skipping to the next instruction if one is found. + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && CS.getCalledFunction()) { + // Ignore calls to functions in the same SCC. + if (SCCNodes.count(CS.getCalledFunction())) + continue; + // Ignore intrinsics that only access local memory. + if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) + if (AliasAnalysis::getModRefBehavior(id) == + AliasAnalysis::AccessesArguments) { + // Check that all pointer arguments point to local memory. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) { + Value *Arg = *CI; + if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg)) + // Writes memory. Just give up. + return false; + } + // Only reads and writes local memory. + continue; + } + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore loads from local memory. + if (PointsToLocalMemory(LI->getPointerOperand())) + continue; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Ignore stores to local memory. + if (PointsToLocalMemory(SI->getPointerOperand())) + continue; + } + + // Any remaining instructions need to be taken seriously! Check if they + // read or write memory. + if (I->mayWriteToMemory()) + // Writes memory. Just give up. + return false; + + if (isMalloc(I)) + // malloc claims not to write memory! PR3754. + return false; + + // If this instruction may read memory, remember that. + ReadsMemory |= I->mayReadFromMemory(); + } + } + + // Success! Functions in this SCC do not access memory, or only read memory. + // Give them the appropriate attribute. + bool MadeChange = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F->doesNotAccessMemory()) + // Already perfect! + continue; + + if (F->onlyReadsMemory() && ReadsMemory) + // No change. + continue; + + MadeChange = true; + + // Clear out any existing attributes. + F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone); + + // Add in the new attribute. + F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone); + + if (ReadsMemory) + ++NumReadOnly; + else + ++NumReadNone; + } + + return MadeChange; +} + +/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. +bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { + bool Changed = false; + + // Check each function in turn, determining which pointer arguments are not + // captured. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F == 0) + // External node - skip it; + continue; + + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + continue; + + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A) + if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() && + !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) { + A->addAttr(Attribute::NoCapture); + ++NumNoCapture; + Changed = true; + } + } + + return Changed; +} + +/// IsFunctionMallocLike - A function is malloc-like if it returns either null +/// or a pointer that doesn't alias any other pointer visible to the caller. +bool FunctionAttrs::IsFunctionMallocLike(Function *F, + SmallPtrSet<Function*, 8> &SCCNodes) const { + UniqueVector<Value *> FlowsToReturn; + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) + if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) + FlowsToReturn.insert(Ret->getReturnValue()); + + for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { + Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved. + + if (Constant *C = dyn_cast<Constant>(RetVal)) { + if (!C->isNullValue() && !isa<UndefValue>(C)) + return false; + + continue; + } + + if (isa<Argument>(RetVal)) + return false; + + if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) + switch (RVI->getOpcode()) { + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(RVI); + for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + FlowsToReturn.insert(PN->getIncomingValue(i)); + continue; + } + + // Check whether the pointer came from an allocation. + case Instruction::Alloca: + break; + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + if (CS.paramHasAttr(0, Attribute::NoAlias)) + break; + if (CS.getCalledFunction() && + SCCNodes.count(CS.getCalledFunction())) + break; + } // fall-through + default: + return false; // Did not come from an allocation. + } + + if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) + return false; + } + + return true; +} + +/// AddNoAliasAttrs - Deduce noalias attributes for the SCC. +bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { + SmallPtrSet<Function*, 8> SCCNodes; + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert((*I)->getFunction()); + + // Check each function in turn, determining which functions return noalias + // pointers. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F == 0) + // External node - skip it; + return false; + + // Already noalias. + if (F->doesNotAlias(0)) + continue; + + // Definitions with weak linkage may be overridden at linktime, so + // treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + return false; + + // We annotate noalias return values, which are only applicable to + // pointer types. + if (!F->getReturnType()->isPointerTy()) + continue; + + if (!IsFunctionMallocLike(F, SCCNodes)) + return false; + } + + bool MadeChange = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) + continue; + + F->setDoesNotAlias(0); + ++NumNoAlias; + MadeChange = true; + } + + return MadeChange; +} + +bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { + bool Changed = AddReadAttrs(SCC); + Changed |= AddNoCaptureAttrs(SCC); + Changed |= AddNoAliasAttrs(SCC); + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp new file mode 100644 index 0000000..44216a6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -0,0 +1,208 @@ +//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transform is designed to eliminate unreachable internal globals from the +// program. It uses an aggressive algorithm, searching out globals that are +// known to be alive. After it finds all of the globals which are needed, it +// deletes whatever is left over. This allows it to delete recursive chunks of +// the program which are unreachable. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globaldce" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumAliases , "Number of global aliases removed"); +STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumVariables, "Number of global variables removed"); + +namespace { + struct GlobalDCE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + GlobalDCE() : ModulePass(&ID) {} + + // run - Do the GlobalDCE pass on the specified module, optionally updating + // the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M); + + private: + SmallPtrSet<GlobalValue*, 32> AliveGlobals; + + /// GlobalIsNeeded - mark the specific global value as needed, and + /// recursively mark anything that it uses as also needed. + void GlobalIsNeeded(GlobalValue *GV); + void MarkUsedGlobalsAsNeeded(Constant *C); + + bool RemoveUnusedGlobalValue(GlobalValue &GV); + }; +} + +char GlobalDCE::ID = 0; +static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination"); + +ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } + +bool GlobalDCE::runOnModule(Module &M) { + bool Changed = false; + + // Loop over the module, adding globals which are obviously necessary. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Functions with external linkage are needed if they have a body + if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + GlobalIsNeeded(I); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Externally visible & appending globals are needed, if they have an + // initializer. + if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + GlobalIsNeeded(I); + } + + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Externally visible aliases are needed. + if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage()) + GlobalIsNeeded(I); + } + + // Now that all globals which are needed are in the AliveGlobals set, we loop + // through the program, deleting those which are not alive. + // + + // The first pass is to drop initializers of global variables which are dead. + std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!AliveGlobals.count(I)) { + DeadGlobalVars.push_back(I); // Keep track of dead globals + I->setInitializer(0); + } + + // The second pass drops the bodies of functions which are dead... + std::vector<Function*> DeadFunctions; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!AliveGlobals.count(I)) { + DeadFunctions.push_back(I); // Keep track of dead globals + if (!I->isDeclaration()) + I->deleteBody(); + } + + // The third pass drops targets of aliases which are dead... + std::vector<GlobalAlias*> DeadAliases; + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; + ++I) + if (!AliveGlobals.count(I)) { + DeadAliases.push_back(I); + I->setAliasee(0); + } + + if (!DeadFunctions.empty()) { + // Now that all interferences have been dropped, delete the actual objects + // themselves. + for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadFunctions[i]); + M.getFunctionList().erase(DeadFunctions[i]); + } + NumFunctions += DeadFunctions.size(); + Changed = true; + } + + if (!DeadGlobalVars.empty()) { + for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadGlobalVars[i]); + M.getGlobalList().erase(DeadGlobalVars[i]); + } + NumVariables += DeadGlobalVars.size(); + Changed = true; + } + + // Now delete any dead aliases. + if (!DeadAliases.empty()) { + for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadAliases[i]); + M.getAliasList().erase(DeadAliases[i]); + } + NumAliases += DeadAliases.size(); + Changed = true; + } + + // Make sure that all memory is released + AliveGlobals.clear(); + + return Changed; +} + +/// GlobalIsNeeded - the specific global value as needed, and +/// recursively mark anything that it uses as also needed. +void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { + // If the global is already in the set, no need to reprocess it. + if (!AliveGlobals.insert(G)) + return; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { + // If this is a global variable, we must make sure to add any global values + // referenced by the initializer to the alive set. + if (GV->hasInitializer()) + MarkUsedGlobalsAsNeeded(GV->getInitializer()); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) { + // The target of a global alias is needed. + MarkUsedGlobalsAsNeeded(GA->getAliasee()); + } else { + // Otherwise this must be a function object. We have to scan the body of + // the function looking for constants and global values which are used as + // operands. Any operands of these types must be processed to ensure that + // any globals used will be marked as needed. + Function *F = cast<Function>(G); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U) + if (GlobalValue *GV = dyn_cast<GlobalValue>(*U)) + GlobalIsNeeded(GV); + else if (Constant *C = dyn_cast<Constant>(*U)) + MarkUsedGlobalsAsNeeded(C); + } +} + +void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return GlobalIsNeeded(GV); + + // Loop over all of the operands of the constant, adding any globals they + // use to the list of needed globals. + for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) + if (Constant *OpC = dyn_cast<Constant>(*I)) + MarkUsedGlobalsAsNeeded(OpC); +} + +// RemoveUnusedGlobalValue - Loop over all of the uses of the specified +// GlobalValue, looking for the constant pointer ref that may be pointing to it. +// If found, check to see if the constant pointer ref is safe to destroy, and if +// so, nuke it. This will reduce the reference count on the global value, which +// might make it deader. +// +bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { + if (GV.use_empty()) return false; + GV.removeDeadConstantUsers(); + return GV.use_empty(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp new file mode 100644 index 0000000..b429213 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -0,0 +1,2569 @@ +//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms simple global variables that never have their address +// taken. If obviously true, it marks read/write globals as constant, deletes +// variables only stored to, etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalopt" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); +STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); +STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); +STATISTIC(NumDeleted , "Number of globals deleted"); +STATISTIC(NumFnDeleted , "Number of functions deleted"); +STATISTIC(NumGlobUses , "Number of global uses devirtualized"); +STATISTIC(NumLocalized , "Number of globals localized"); +STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); +STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); +STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); +STATISTIC(NumNestRemoved , "Number of nest attributes removed"); +STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); +STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); + +namespace { + struct GlobalOpt : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + static char ID; // Pass identification, replacement for typeid + GlobalOpt() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + + private: + GlobalVariable *FindGlobalCtors(Module &M); + bool OptimizeFunctions(Module &M); + bool OptimizeGlobalVars(Module &M); + bool OptimizeGlobalAliases(Module &M); + bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + }; +} + +char GlobalOpt::ID = 0; +static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); + +ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } + +namespace { + +/// GlobalStatus - As we analyze each global, keep track of some information +/// about it. If we find out that the address of the global is taken, none of +/// this info will be accurate. +struct GlobalStatus { + /// isLoaded - True if the global is ever loaded. If the global isn't ever + /// loaded it can be deleted. + bool isLoaded; + + /// StoredType - Keep track of what stores to the global look like. + /// + enum StoredType { + /// NotStored - There is no store to this global. It can thus be marked + /// constant. + NotStored, + + /// isInitializerStored - This global is stored to, but the only thing + /// stored is the constant it was initialized with. This is only tracked + /// for scalar globals. + isInitializerStored, + + /// isStoredOnce - This global is stored to, but only its initializer and + /// one other value is ever stored to it. If this global isStoredOnce, we + /// track the value stored to it in StoredOnceValue below. This is only + /// tracked for scalar globals. + isStoredOnce, + + /// isStored - This global is stored to by multiple values or something else + /// that we cannot track. + isStored + } StoredType; + + /// StoredOnceValue - If only one value (besides the initializer constant) is + /// ever stored to this global, keep track of what value it is. + Value *StoredOnceValue; + + /// AccessingFunction/HasMultipleAccessingFunctions - These start out + /// null/false. When the first accessing function is noticed, it is recorded. + /// When a second different accessing function is noticed, + /// HasMultipleAccessingFunctions is set to true. + const Function *AccessingFunction; + bool HasMultipleAccessingFunctions; + + /// HasNonInstructionUser - Set to true if this global has a user that is not + /// an instruction (e.g. a constant expr or GV initializer). + bool HasNonInstructionUser; + + /// HasPHIUser - Set to true if this global has a user that is a PHI node. + bool HasPHIUser; + + GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), + AccessingFunction(0), HasMultipleAccessingFunctions(false), + HasNonInstructionUser(false), HasPHIUser(false) {} +}; + +} + +// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used +// by constants itself. Note that constants cannot be cyclic, so this test is +// pretty easy to implement recursively. +// +static bool SafeToDestroyConstant(const Constant *C) { + if (isa<GlobalValue>(C)) return false; + + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) + if (const Constant *CU = dyn_cast<Constant>(*UI)) { + if (!SafeToDestroyConstant(CU)) return false; + } else + return false; + return true; +} + + +/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus +/// structure. If the global has its address taken, return true to indicate we +/// can't do anything with it. +/// +static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, + SmallPtrSet<const PHINode*, 16> &PHIUsers) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + GS.HasNonInstructionUser = true; + + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; + + } else if (const Instruction *I = dyn_cast<Instruction>(*UI)) { + if (!GS.HasMultipleAccessingFunctions) { + const Function *F = I->getParent()->getParent(); + if (GS.AccessingFunction == 0) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + GS.isLoaded = true; + if (LI->isVolatile()) return true; // Don't hack on volatile loads. + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) return true; + + if (SI->isVolatile()) return true; // Don't hack on volatile stores. + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::isStored) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( + SI->getOperand(1))) { + Value *StoredVal = SI->getOperand(0); + if (StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::isInitializerStored) + GS.StoredType = GlobalStatus::isInitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + if (GS.StoredType < GlobalStatus::isInitializerStored) + GS.StoredType = GlobalStatus::isInitializerStored; + } else if (GS.StoredType < GlobalStatus::isStoredOnce) { + GS.StoredType = GlobalStatus::isStoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::isStoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::isStored; + } + } else { + GS.StoredType = GlobalStatus::isStored; + } + } + } else if (isa<GetElementPtrInst>(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + } else if (isa<SelectInst>(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PHIUsers.insert(PN)) // Not already visited. + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + GS.HasPHIUser = true; + } else if (isa<CmpInst>(I)) { + } else if (isa<MemTransferInst>(I)) { + if (I->getOperand(1) == V) + GS.StoredType = GlobalStatus::isStored; + if (I->getOperand(2) == V) + GS.isLoaded = true; + } else if (isa<MemSetInst>(I)) { + assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + GS.StoredType = GlobalStatus::isStored; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (const Constant *C = dyn_cast<Constant>(*UI)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!SafeToDestroyConstant(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + + return false; +} + +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { + ConstantInt *CI = dyn_cast<ConstantInt>(Idx); + if (!CI) return 0; + unsigned IdxV = CI->getZExtValue(); + + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) { + if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); + } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) { + if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); + } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { + if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); + } else if (isa<ConstantAggregateZero>(Agg)) { + if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (IdxV < STy->getNumElements()) + return Constant::getNullValue(STy->getElementType(IdxV)); + } else if (const SequentialType *STy = + dyn_cast<SequentialType>(Agg->getType())) { + return Constant::getNullValue(STy->getElementType()); + } + } else if (isa<UndefValue>(Agg)) { + if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (IdxV < STy->getNumElements()) + return UndefValue::get(STy->getElementType(IdxV)); + } else if (const SequentialType *STy = + dyn_cast<SequentialType>(Agg->getType())) { + return UndefValue::get(STy->getElementType()); + } + } + return 0; +} + + +/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all +/// users of the global, cleaning up the obvious ones. This is largely just a +/// quick scan over the use list to clean up the easy and obvious cruft. This +/// returns true if it made a change. +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { + bool Changed = false; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { + User *U = *UI++; + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (Init) { + // Replace the load with the initializer. + LI->replaceAllUsesWith(Init); + LI->eraseFromParent(); + Changed = true; + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Store must be unreachable or storing Init into the global. + SI->eraseFromParent(); + Changed = true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Constant *SubInit = 0; + if (Init) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + Changed |= CleanupConstantGlobalUsers(CE, SubInit); + } else if (CE->getOpcode() == Instruction::BitCast && + CE->getType()->isPointerTy()) { + // Pointer cast, delete any stores and memsets to the global. + Changed |= CleanupConstantGlobalUsers(CE, 0); + } + + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // Do not transform "gepinst (gep constexpr (GV))" here, because forming + // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold + // and will invalidate our notion of what Init is. + Constant *SubInit = 0; + if (!isa<ConstantExpr>(GEP->getOperand(0))) { + ConstantExpr *CE = + dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); + if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + } + Changed |= CleanupConstantGlobalUsers(GEP, SubInit); + + if (GEP->use_empty()) { + GEP->eraseFromParent(); + Changed = true; + } + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv + if (MI->getRawDest() == V) { + MI->eraseFromParent(); + Changed = true; + } + + } else if (Constant *C = dyn_cast<Constant>(U)) { + // If we have a chain of dead constantexprs or other things dangling from + // us, and if they are all dead, nuke them without remorse. + if (SafeToDestroyConstant(C)) { + C->destroyConstant(); + // This could have invalidated UI, start over from scratch. + CleanupConstantGlobalUsers(V, Init); + return true; + } + } + } + return Changed; +} + +/// isSafeSROAElementUse - Return true if the specified instruction is a safe +/// user of a derived expression from a global that we want to SROA. +static bool isSafeSROAElementUse(Value *V) { + // We might have a dead and dangling constant hanging off of here. + if (Constant *C = dyn_cast<Constant>(V)) + return SafeToDestroyConstant(C); + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // Loads are ok. + if (isa<LoadInst>(I)) return true; + + // Stores *to* the pointer are ok. + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getOperand(0) != V; + + // Otherwise, it must be a GEP. + GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); + if (GEPI == 0) return false; + + if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || + !cast<Constant>(GEPI->getOperand(1))->isNullValue()) + return false; + + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); + I != E; ++I) + if (!isSafeSROAElementUse(*I)) + return false; + return true; +} + + +/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. +/// Look at it and its uses and decide whether it is safe to SROA this global. +/// +static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { + // The user of the global must be a GEP Inst or a ConstantExpr GEP. + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || + cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) + return false; + + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || + !cast<Constant>(U->getOperand(1))->isNullValue() || + !isa<ConstantInt>(U->getOperand(2))) + return false; + + gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); + ++GEPI; // Skip over the pointer index. + + // If this is a use of an array allocation, do a bit more checking for sanity. + if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { + uint64_t NumElements = AT->getNumElements(); + ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); + + // Check to make sure that index falls within the array. If not, + // something funny is going on, so we won't do the optimization. + // + if (Idx->getZExtValue() >= NumElements) + return false; + + // We cannot scalar repl this level of the array unless any array + // sub-indices are in-range constants. In particular, consider: + // A[0][i]. We cannot know that the user isn't doing invalid things like + // allowing i to index an out-of-range subscript that accesses A[1]. + // + // Scalar replacing *just* the outer index of the array is probably not + // going to be a win anyway, so just give up. + for (++GEPI; // Skip array index. + GEPI != E; + ++GEPI) { + uint64_t NumElements; + if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) + NumElements = SubArrayTy->getNumElements(); + else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) + NumElements = SubVectorTy->getNumElements(); + else { + assert((*GEPI)->isStructTy() && + "Indexed GEP type is not array, vector, or struct!"); + continue; + } + + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); + if (!IdxVal || IdxVal->getZExtValue() >= NumElements) + return false; + } + } + + for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) + if (!isSafeSROAElementUse(*I)) + return false; + return true; +} + +/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it +/// is safe for us to perform this transformation. +/// +static bool GlobalUsersSafeToSRA(GlobalValue *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + if (!IsUserOfGlobalSafeForSRA(*UI, GV)) + return false; + } + return true; +} + + +/// SRAGlobal - Perform scalar replacement of aggregates on the specified global +/// variable. This opens the door for other optimizations by exposing the +/// behavior of the program in a more fine-grained way. We have determined that +/// this transformation is safe already. We return the first global variable we +/// insert so that the caller can reprocess it. +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { + // Make sure this global only has simple uses that we can SRA. + if (!GlobalUsersSafeToSRA(GV)) + return 0; + + assert(GV->hasLocalLinkage() && !GV->isConstant()); + Constant *Init = GV->getInitializer(); + const Type *Ty = Init->getType(); + + std::vector<GlobalVariable*> NewGlobals; + Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + + // Get the alignment of the global, either explicit or target-specific. + unsigned StartAlignment = GV->getAlignment(); + if (StartAlignment == 0) + StartAlignment = TD.getABITypeAlignment(GV->getType()); + + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + NewGlobals.reserve(STy->getNumElements()); + const StructLayout &Layout = *TD.getStructLayout(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Constant *In = getAggregateConstantElement(Init, + ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); + assert(In && "Couldn't get element of initializer?"); + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+Twine(i), + GV->isThreadLocal(), + GV->getType()->getAddressSpace()); + Globals.insert(GV, NGV); + NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + uint64_t FieldOffset = Layout.getElementOffset(i); + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); + if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) + NGV->setAlignment(NewAlign); + } + } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + unsigned NumElements = 0; + if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) + NumElements = ATy->getNumElements(); + else + NumElements = cast<VectorType>(STy)->getNumElements(); + + if (NumElements > 16 && GV->hasNUsesOrMore(16)) + return 0; // It's not worth it. + NewGlobals.reserve(NumElements); + + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); + unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); + for (unsigned i = 0, e = NumElements; i != e; ++i) { + Constant *In = getAggregateConstantElement(Init, + ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); + assert(In && "Couldn't get element of initializer?"); + + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+Twine(i), + GV->isThreadLocal(), + GV->getType()->getAddressSpace()); + Globals.insert(GV, NGV); + NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + if (NewAlign > EltAlign) + NGV->setAlignment(NewAlign); + } + } + + if (NewGlobals.empty()) + return 0; + + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); + + Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); + + // Loop over all of the uses of the global, replacing the constantexpr geps, + // with smaller constantexpr geps or direct references. + while (!GV->use_empty()) { + User *GEP = GV->use_back(); + assert(((isa<ConstantExpr>(GEP) && + cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| + isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); + + // Ignore the 1th operand, which has to be zero or else the program is quite + // broken (undefined). Get the 2nd operand, which is the structure or array + // index. + unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); + if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. + + Value *NewPtr = NewGlobals[Val]; + + // Form a shorter GEP if needed. + if (GEP->getNumOperands() > 3) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { + SmallVector<Constant*, 8> Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) + Idxs.push_back(CE->getOperand(i)); + NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), + &Idxs[0], Idxs.size()); + } else { + GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); + SmallVector<Value*, 8> Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) + Idxs.push_back(GEPI->getOperand(i)); + NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), + GEPI->getName()+"."+Twine(Val),GEPI); + } + } + GEP->replaceAllUsesWith(NewPtr); + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) + GEPI->eraseFromParent(); + else + cast<ConstantExpr>(GEP)->destroyConstant(); + } + + // Delete the old global, now that it is dead. + Globals.erase(GV); + ++NumSRA; + + // Loop over the new globals array deleting any globals that are obviously + // dead. This can arise due to scalarization of a structure or an array that + // has elements that are dead. + unsigned FirstGlobal = 0; + for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) + if (NewGlobals[i]->use_empty()) { + Globals.erase(NewGlobals[i]); + if (FirstGlobal == i) ++FirstGlobal; + } + + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; +} + +/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified +/// value will trap if the value is dynamically null. PHIs keeps track of any +/// phi nodes we've seen to avoid reprocessing them. +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet<const PHINode*, 8> &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + + if (isa<LoadInst>(U)) { + // Will trap. + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == V) { + //cerr << "NONTRAPPING USE: " << *U; + return false; // Storing the value. + } + } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { + if (CI->getCalledValue() != V) { + //cerr << "NONTRAPPING USE: " << *U; + return false; // Not calling the ptr + } + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { + if (II->getCalledValue() != V) { + //cerr << "NONTRAPPING USE: " << *U; + return false; // Not calling the ptr + } + } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { + if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { + // If we've already seen this phi node, ignore it, it has already been + // checked. + if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) + return false; + } else if (isa<ICmpInst>(U) && + isa<ConstantPointerNull>(UI->getOperand(1))) { + // Ignore icmp X, null + } else { + //cerr << "NONTRAPPING USE: " << *U; + return false; + } + } + return true; +} + +/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads +/// from GV will trap if the loaded value is null. Note that this also permits +/// comparisons of the loaded value against null, as a special case. +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + SmallPtrSet<const PHINode*, 8> PHIs; + if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) + return false; + } else if (isa<StoreInst>(U)) { + // Ignore stores to the global. + } else { + // We don't know or understand this user, bail out. + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; + return false; + } + } + return true; +} + +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { + bool Changed = false; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { + Instruction *I = cast<Instruction>(*UI++); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + LI->setOperand(0, NewV); + Changed = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (SI->getOperand(1) == V) { + SI->setOperand(1, NewV); + Changed = true; + } + } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + CallSite CS(I); + if (CS.getCalledValue() == V) { + // Calling through the pointer! Turn into a direct call, but be careful + // that the pointer is not also being passed as an argument. + CS.setCalledFunction(NewV); + Changed = true; + bool PassedAsArg = false; + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { + PassedAsArg = true; + CS.setArgument(i, NewV); + } + + if (PassedAsArg) { + // Being passed as an argument also. Be careful to not invalidate UI! + UI = V->use_begin(); + } + } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + Changed |= OptimizeAwayTrappingUsesOfValue(CI, + ConstantExpr::getCast(CI->getOpcode(), + NewV, CI->getType())); + if (CI->use_empty()) { + Changed = true; + CI->eraseFromParent(); + } + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + // Should handle GEP here. + SmallVector<Constant*, 8> Idxs; + Idxs.reserve(GEPI->getNumOperands()-1); + for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); + i != e; ++i) + if (Constant *C = dyn_cast<Constant>(*i)) + Idxs.push_back(C); + else + break; + if (Idxs.size() == GEPI->getNumOperands()-1) + Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, + ConstantExpr::getGetElementPtr(NewV, &Idxs[0], + Idxs.size())); + if (GEPI->use_empty()) { + Changed = true; + GEPI->eraseFromParent(); + } + } + } + + return Changed; +} + + +/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null +/// value stored into it. If there are uses of the loaded value that would trap +/// if the loaded value is dynamically null, then we know that they cannot be +/// reachable with a null optimize away the load. +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { + bool Changed = false; + + // Keep track of whether we are able to remove all the uses of the global + // other than the store that defines it. + bool AllNonStoreUsesGone = true; + + // Replace all uses of loads with uses of uses of the stored value. + for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ + User *GlobalUser = *GUI++; + if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); + // If we were able to delete all uses of the loads + if (LI->use_empty()) { + LI->eraseFromParent(); + Changed = true; + } else { + AllNonStoreUsesGone = false; + } + } else if (isa<StoreInst>(GlobalUser)) { + // Ignore the store that stores "LV" to the global. + assert(GlobalUser->getOperand(1) == GV && + "Must be storing *to* the global"); + } else { + AllNonStoreUsesGone = false; + + // If we get here we could have other crazy uses that are transitively + // loaded. + assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || + isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); + } + } + + if (Changed) { + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); + ++NumGlobUses; + } + + // If we nuked all of the loads, then none of the stores are needed either, + // nor is the global. + if (AllNonStoreUsesGone) { + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); + CleanupConstantGlobalUsers(GV, 0); + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + } + Changed = true; + } + return Changed; +} + +/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the +/// instructions that are foldable. +static void ConstantPropUsersOf(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) + if (Instruction *I = dyn_cast<Instruction>(*UI++)) + if (Constant *NewC = ConstantFoldInstruction(I)) { + I->replaceAllUsesWith(NewC); + + // Advance UI to the next non-I use to avoid invalidating it! + // Instructions could multiply use V. + while (UI != E && *UI == I) + ++UI; + I->eraseFromParent(); + } +} + +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc. Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc. Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, + CallInst *CI, + const Type *AllocTy, + ConstantInt *NElements, + TargetData* TD) { + DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); + + const Type *GlobalType; + if (NElements->getZExtValue() == 1) + GlobalType = AllocTy; + else + // If we have an array allocation, the global variable is of an array. + GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + GlobalType, false, + GlobalValue::InternalLinkage, + UndefValue::get(GlobalType), + GV->getName()+".body", + GV, + GV->isThreadLocal()); + + // If there are bitcast users of the malloc (which is typical, usually we have + // a malloc + bitcast) then replace them with uses of the new global. Update + // other users to use the global as well. + BitCastInst *TheBC = 0; + while (!CI->use_empty()) { + Instruction *User = cast<Instruction>(CI->use_back()); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { + if (BCI->getType() == NewGV->getType()) { + BCI->replaceAllUsesWith(NewGV); + BCI->eraseFromParent(); + } else { + BCI->setOperand(0, NewGV); + } + } else { + if (TheBC == 0) + TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); + User->replaceUsesOfWith(CI, TheBC); + } + } + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), + GV->getName()+".init", GV->isThreadLocal()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + while (!GV->use_empty()) { + if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) { + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); + SI->eraseFromParent(); + continue; + } + + LoadInst *LI = cast<LoadInst>(GV->use_back()); + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa<ICmpInst>(LoadUse.getUser())) { + LoadUse = RepValue; + continue; + } + + ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: // X < null -> always false + LV = ConstantInt::getFalse(GV->getContext()); + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + LI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast<StoreInst>(InitBool->use_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV, InitBool); + + // Now the GV is dead, nuke it and the malloc.. + GV->eraseFromParent(); + CI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue); + + return NewGV; +} + +/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking +/// to make sure that there are no complex uses of V. We permit simple things +/// like dereferencing the pointer, but not storing through the address, unless +/// it is to the specified global. +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet<const PHINode*, 8> &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + const Instruction *Inst = cast<Instruction>(*UI); + + if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { + continue; // Fine, ignore. + } + + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->getOperand(0) == V && SI->getOperand(1) != GV) + return false; // Storing the pointer itself... bad. + continue; // Otherwise, storing through it, or storing into GV... fine. + } + + // Must index into the array and into the struct. + if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) + return false; + continue; + } + + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { + // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI + // cycles. + if (PHIs.insert(PN)) + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) + return false; + continue; + } + + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + return false; + continue; + } + + return false; + } + return true; +} + +/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV +/// somewhere. Transform all uses of the allocation into loads from the +/// global and uses of the resultant pointer. Further, delete the store into +/// GV. This assumes that these value pass the +/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, + GlobalVariable *GV) { + while (!Alloc->use_empty()) { + Instruction *U = cast<Instruction>(*Alloc->use_begin()); + Instruction *InsertPt = U; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // If this is the store of the allocation into the global, remove it. + if (SI->getOperand(1) == GV) { + SI->eraseFromParent(); + continue; + } + } else if (PHINode *PN = dyn_cast<PHINode>(U)) { + // Insert the load in the corresponding predecessor, not right before the + // PHI. + InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); + } else if (isa<BitCastInst>(U)) { + // Must be bitcast between the malloc and store to initialize the global. + ReplaceUsesOfMallocWithGlobal(U, GV); + U->eraseFromParent(); + continue; + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + // If this is a "GEP bitcast" and the user is a store to the global, then + // just process it as a bitcast. + if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) + if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) + if (SI->getOperand(1) == GV) { + // Must be bitcast GEP between the malloc and store to initialize + // the global. + ReplaceUsesOfMallocWithGlobal(GEPI, GV); + GEPI->eraseFromParent(); + continue; + } + } + + // Insert a load from the global, and use it instead of the malloc. + Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); + U->replaceUsesOfWith(Alloc, NL); + } +} + +/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi +/// of a load) are simple enough to perform heap SRA on. This permits GEP's +/// that index through the array and struct field, icmps of null, and PHIs. +static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { + // We permit two users of the load: setcc comparing against the null + // pointer, and a getelementptr of a specific form. + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const Instruction *User = cast<Instruction>(*UI); + + // Comparison against null is ok. + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return false; + continue; + } + + // getelementptr is also ok, but only a simple form. + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + // Must index into the array and into the struct. + if (GEPI->getNumOperands() < 3) + return false; + + // Otherwise the GEP is ok. + continue; + } + + if (const PHINode *PN = dyn_cast<PHINode>(User)) { + if (!LoadUsingPHIsPerLoad.insert(PN)) + // This means some phi nodes are dependent on each other. + // Avoid infinite looping! + return false; + if (!LoadUsingPHIs.insert(PN)) + // If we have already analyzed this PHI, then it is safe. + continue; + + // Make sure all uses of the PHI are simple enough to transform. + if (!LoadUsesSimpleEnoughForHeapSRA(PN, + LoadUsingPHIs, LoadUsingPHIsPerLoad)) + return false; + + continue; + } + + // Otherwise we don't know what this is, not ok. + return false; + } + + return true; +} + + +/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from +/// GV are simple enough to perform HeapSRA, return true. +static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, + Instruction *StoredVal) { + SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; + SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) + if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, + LoadUsingPHIsPerLoad)) + return false; + LoadUsingPHIsPerLoad.clear(); + } + + // If we reach here, we know that all uses of the loads and transitive uses + // (through PHI nodes) are simple enough to transform. However, we don't know + // that all inputs the to the PHI nodes are in the same equivalence sets. + // Check to verify that all operands of the PHIs are either PHIS that can be + // transformed, loads from GV, or MI itself. + for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() + , E = LoadUsingPHIs.end(); I != E; ++I) { + const PHINode *PN = *I; + for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { + Value *InVal = PN->getIncomingValue(op); + + // PHI of the stored value itself is ok. + if (InVal == StoredVal) continue; + + if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { + // One of the PHIs in our set is (optimistically) ok. + if (LoadUsingPHIs.count(InPN)) + continue; + return false; + } + + // Load from GV is ok. + if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) + if (LI->getOperand(0) == GV) + continue; + + // UNDEF? NULL? + + // Anything else is rejected. + return false; + } + } + + return true; +} + +static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, + DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; + + if (FieldNo >= FieldVals.size()) + FieldVals.resize(FieldNo+1); + + // If we already have this value, just reuse the previously scalarized + // version. + if (Value *FieldVal = FieldVals[FieldNo]) + return FieldVal; + + // Depending on what instruction this is, we have several cases. + Value *Result; + if (LoadInst *LI = dyn_cast<LoadInst>(V)) { + // This is a scalarized version of the load from the global. Just create + // a new Load of the scalarized global. + Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, + InsertedScalarizedValues, + PHIsToRewrite), + LI->getName()+".f"+Twine(FieldNo), LI); + } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + // PN's type is pointer to struct. Make a new PHI of pointer to struct + // field. + const StructType *ST = + cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); + + Result = + PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getName()+".f"+Twine(FieldNo), PN); + PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); + } else { + llvm_unreachable("Unknown usable value"); + Result = 0; + } + + return FieldVals[FieldNo] = Result; +} + +/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from +/// the load, rewrite the derived value to use the HeapSRoA'd load. +static void RewriteHeapSROALoadUser(Instruction *LoadUser, + DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + // If this is a comparison against null, handle it. + if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { + assert(isa<ConstantPointerNull>(SCI->getOperand(1))); + // If we have a setcc of the loaded pointer, we can use a setcc of any + // field. + Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, + InsertedScalarizedValues, PHIsToRewrite); + + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName()); + SCI->replaceAllUsesWith(New); + SCI->eraseFromParent(); + return; + } + + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { + assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) + && "Unexpected GEPI!"); + + // Load the pointer for this field. + unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); + Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, + InsertedScalarizedValues, PHIsToRewrite); + + // Create the new GEP idx vector. + SmallVector<Value*, 8> GEPIdx; + GEPIdx.push_back(GEPI->getOperand(1)); + GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); + + Value *NGEPI = GetElementPtrInst::Create(NewPtr, + GEPIdx.begin(), GEPIdx.end(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NGEPI); + GEPI->eraseFromParent(); + return; + } + + // Recursively transform the users of PHI nodes. This will lazily create the + // PHIs that are needed for individual elements. Keep track of what PHIs we + // see in InsertedScalarizedValues so that we don't get infinite loops (very + // antisocial). If the PHI is already in InsertedScalarizedValues, it has + // already been seen first by another load, so its uses have already been + // processed. + PHINode *PN = cast<PHINode>(LoadUser); + bool Inserted; + DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; + tie(InsertPos, Inserted) = + InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); + if (!Inserted) return; + + // If this is the first time we've seen this PHI, recursively process all + // users. + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { + Instruction *User = cast<Instruction>(*UI++); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + } +} + +/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr +/// is a value loaded from the global. Eliminate all uses of Ptr, making them +/// use FieldGlobals instead. All uses of loaded values satisfy +/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, + DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { + for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); + UI != E; ) { + Instruction *User = cast<Instruction>(*UI++); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + } + + if (Load->use_empty()) { + Load->eraseFromParent(); + InsertedScalarizedValues.erase(Load); + } +} + +/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, + Value* NElems, TargetData *TD) { + DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); + const Type* MAT = getMallocAllocatedType(CI); + const StructType *STy = cast<StructType>(MAT); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(CI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as CI, and N globals. + std::vector<Value*> FieldGlobals; + std::vector<Value*> FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(*GV->getParent(), + PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + Twine(FieldNo), GV, + GV->isThreadLocal()); + FieldGlobals.push_back(NGV); + + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); + if (const StructType *ST = dyn_cast<StructType>(FieldTy)) + TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, + ConstantInt::get(IntPtrTy, TypeSize), + NElems, + CI->getName() + ".f" + Twine(FieldNo)); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, CI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + // The malloc can also fail if its argument is too large. + Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + ConstantZero, "isneg"); + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = CI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), + "malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp"); + BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", + OrigBB->getParent()); + Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, + Cmp, NullPtrBlock); + + // Fill in FreeBlock. + CallInst::CreateFree(GVVal, BI); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + BranchInst::Create(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + BranchInst::Create(ContBB, NullPtrBlock); + + // CI is no longer needed, remove it. + CI->eraseFromParent(); + + /// InsertedScalarizedLoads - As we process loads, if we can't immediately + /// update all uses of the load, keep track of what scalarized loads are + /// inserted for a given load. + DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; + InsertedScalarizedValues[GV] = FieldGlobals; + + std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); + continue; + } + + // Must be a store of null. + StoreInst *SI = cast<StoreInst>(User); + assert(isa<ConstantPointerNull>(SI->getOperand(0)) && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); + Constant *Null = Constant::getNullValue(PT->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + + // While we have PHIs that are interesting to rewrite, do it. + while (!PHIsToRewrite.empty()) { + PHINode *PN = PHIsToRewrite.back().first; + unsigned FieldNo = PHIsToRewrite.back().second; + PHIsToRewrite.pop_back(); + PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); + assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + + // Add all the incoming values. This can materialize more phis. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, + PHIsToRewrite); + FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); + } + } + + // Drop all inter-phi links and any loads that made it this far. + for (DenseMap<Value*, std::vector<Value*> >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I->first)) + PN->dropAllReferences(); + else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) + LI->dropAllReferences(); + } + + // Delete all the phis and loads now that inter-references are dead. + for (DenseMap<Value*, std::vector<Value*> >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I->first)) + PN->eraseFromParent(); + else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return cast<GlobalVariable>(FieldGlobals[0]); +} + +/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a +/// pointer global variable with a single value stored it that is a malloc or +/// cast of malloc. +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, + CallInst *CI, + const Type *AllocTy, + Module::global_iterator &GVI, + TargetData *TD) { + if (!TD) + return false; + + // If this is a malloc of an abstract type, don't touch it. + if (!AllocTy->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + SmallPtrSet<const PHINode*, 8> PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + // We cannot optimize the malloc if we cannot determine malloc array size. + Value *NElems = getMallocArraySize(CI, TD, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); + return true; + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) + if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); + + const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + CI = dyn_cast<BitCastInst>(Malloc) ? + extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); + } + + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); + return true; + } + + return false; +} + +// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge +// that only one value (besides its initializer) is ever stored to the global. +static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + Module::global_iterator &GVI, + TargetData *TD) { + // Ignore no-op GEPs and bitcasts. + StoredOnceVal = StoredOnceVal->stripPointerCasts(); + + // If we are dealing with a pointer global that is initialized to null and + // only has one (non-null) value stored into it, then we can optimize any + // users of the loaded value (often calls and loads) that would trap if the + // value was null. + if (GV->getInitializer()->getType()->isPointerTy() && + GV->getInitializer()->isNullValue()) { + if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { + if (GV->getInitializer()->getType() != SOVC->getType()) + SOVC = + ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + + // Optimize away any trapping uses of the loaded value. + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) + return true; + } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { + const Type* MallocType = getMallocAllocatedType(CI); + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + GVI, TD)) + return true; + } + } + + return false; +} + +/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only +/// two values ever stored into GV are its initializer and OtherVal. See if we +/// can shrink the global into a boolean and select between the two values +/// whenever it is used. This exposes the values to other scalar optimizations. +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { + const Type *GVElType = GV->getType()->getElementType(); + + // If GVElType is already i1, it is already shrunk. If the type of the GV is + // an FP value, pointer or vector, don't do this optimization because a select + // between them is very expensive and unlikely to lead to later + // simplification. In these cases, we typically end up with "cond ? v1 : v2" + // where v1 and v2 both require constant pool loads, a big loss. + if (GVElType == Type::getInt1Ty(GV->getContext()) || + GVElType->isFloatingPointTy() || + GVElType->isPointerTy() || GVElType->isVectorTy()) + return false; + + // Walk the use list of the global seeing if all the uses are load or store. + // If there is anything else, bail out. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + return false; + + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); + + // Create the new global, initializing it to false. + GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), + false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), + GV->getName()+".b", + GV->isThreadLocal()); + GV->getParent()->getGlobalList().insert(GV, NewGV); + + Constant *InitVal = GV->getInitializer(); + assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && + "No reason to shrink to bool!"); + + // If initialized to zero and storing one into the global, we can use a cast + // instead of a select to synthesize the desired value. + bool IsOneZero = false; + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) + IsOneZero = InitVal->isNullValue() && CI->isOne(); + + while (!GV->use_empty()) { + Instruction *UI = cast<Instruction>(GV->use_back()); + if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { + // Change the store into a boolean store. + bool StoringOther = SI->getOperand(0) == OtherVal; + // Only do this if we weren't storing a loaded value. + Value *StoreVal; + if (StoringOther || SI->getOperand(0) == InitVal) + StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), + StoringOther); + else { + // Otherwise, we are storing a previously loaded copy. To do this, + // change the copy from copying the original value to just copying the + // bool. + Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); + + // If we're already replaced the input, StoredVal will be a cast or + // select instruction. If not, it will be a load of the original + // global. + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + assert(LI->getOperand(0) == GV && "Not a copy!"); + // Insert a new load, to preserve the saved value. + StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); + } else { + assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && + "This is not a form that we understand!"); + StoreVal = StoredVal->getOperand(0); + assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); + } + } + new StoreInst(StoreVal, NewGV, SI); + } else { + // Change the load into a load of bool then a select. + LoadInst *LI = cast<LoadInst>(UI); + LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); + Value *NSI; + if (IsOneZero) + NSI = new ZExtInst(NLI, LI->getType(), "", LI); + else + NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); + NSI->takeName(LI); + LI->replaceAllUsesWith(NSI); + } + UI->eraseFromParent(); + } + + GV->eraseFromParent(); + return true; +} + + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + SmallPtrSet<const PHINode*, 16> PHIUsers; + GlobalStatus GS; + GV->removeDeadConstantUsers(); + + if (GV->use_empty()) { + DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); + GV->eraseFromParent(); + ++NumDeleted; + return true; + } + + if (!AnalyzeGlobal(GV, GS, PHIUsers)) { +#if 0 + DEBUG(dbgs() << "Global: " << *GV); + DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); + DEBUG(dbgs() << " StoredType = "); + switch (GS.StoredType) { + case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; + case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); + break; + case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; + case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; + } + if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) + DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); + if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) + DEBUG(dbgs() << " AccessingFunction = " + << GS.AccessingFunction->getName() << "\n"); + DEBUG(dbgs() << " HasMultipleAccessingFunctions = " + << GS.HasMultipleAccessingFunctions << "\n"); + DEBUG(dbgs() << " HasNonInstructionUser = " + << GS.HasNonInstructionUser<<"\n"); + DEBUG(dbgs() << "\n"); +#endif + + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction + ->getEntryBlock().begin()); + const Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + if (!isa<UndefValue>(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + Changed = true; + } + return Changed; + + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + GV->setConstant(true); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (isa<UndefValue>(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; + } + + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysisIfAvailable<TargetData>())) + return true; + + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; + return true; + } + } + } + return false; +} + +/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified +/// function, changing them to FastCC. +static void ChangeCalleesToFastCall(Function *F) { + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + CallSite User(cast<Instruction>(*UI)); + User.setCallingConv(CallingConv::Fast); + } +} + +static AttrListPtr StripNest(const AttrListPtr &Attrs) { + for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { + if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) + continue; + + // There can be only one. + return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); + } + + return Attrs; +} + +static void RemoveNestAttribute(Function *F) { + F->setAttributes(StripNest(F->getAttributes())); + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + CallSite User(cast<Instruction>(*UI)); + User.setAttributes(StripNest(User.getAttributes())); + } +} + +bool GlobalOpt::OptimizeFunctions(Module &M) { + bool Changed = false; + // Optimize functions. + for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { + Function *F = FI++; + // Functions without names cannot be referenced outside this module. + if (!F->hasName() && !F->isDeclaration()) + F->setLinkage(GlobalValue::InternalLinkage); + F->removeDeadConstantUsers(); + if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) { + F->eraseFromParent(); + Changed = true; + ++NumFnDeleted; + } else if (F->hasLocalLinkage()) { + if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && + !F->hasAddressTaken()) { + // If this function has C calling conventions, is not a varargs + // function, and is only called directly, promote it to use the Fast + // calling convention. + F->setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(F); + ++NumFastCallFns; + Changed = true; + } + + if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && + !F->hasAddressTaken()) { + // The function is not used by a trampoline intrinsic, so it is safe + // to remove the 'nest' attribute. + RemoveNestAttribute(F); + ++NumNestRemoved; + Changed = true; + } + } + } + return Changed; +} + +bool GlobalOpt::OptimizeGlobalVars(Module &M) { + bool Changed = false; + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + // Global variables without names cannot be referenced outside this module. + if (!GV->hasName() && !GV->isDeclaration()) + GV->setLinkage(GlobalValue::InternalLinkage); + // Simplify the initializer. + if (GV->hasInitializer()) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + GV->setInitializer(New); + } + // Do more involved optimizations if the global is internal. + if (!GV->isConstant() && GV->hasLocalLinkage() && + GV->hasInitializer()) + Changed |= ProcessInternalGlobal(GV, GVI); + } + return Changed; +} + +/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// initializers have an init priority of 65535. +GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->getName() == "llvm.global_ctors") { + // Found it, verify it's an array of { int, void()* }. + const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); + if (!ATy) return 0; + const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); + if (!STy || STy->getNumElements() != 2 || + !STy->getElementType(0)->isIntegerTy(32)) return 0; + const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); + if (!PFTy) return 0; + const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); + if (!FTy || !FTy->getReturnType()->isVoidTy() || + FTy->isVarArg() || FTy->getNumParams() != 0) + return 0; + + // Verify that the initializer is simple enough for us to handle. + if (!I->hasDefinitiveInitializer()) return 0; + ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); + if (!CA) return 0; + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!CI || CI->getZExtValue() != 65535) + return 0; + } else { + return 0; + } + + return I; + } + return 0; +} + +/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + std::vector<Function*> Result; + Result.reserve(CA->getNumOperands()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = cast<ConstantStruct>(*i); + Result.push_back(dyn_cast<Function>(CS->getOperand(1))); + } + return Result; +} + +/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the +/// specified array, returning the new global to use. +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, + const std::vector<Function*> &Ctors) { + // If we made a change, reassemble the initializer list. + std::vector<Constant*> CSVals; + CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); + CSVals.push_back(0); + + // Create the new init list. + std::vector<Constant*> CAList; + for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { + if (Ctors[i]) { + CSVals[1] = Ctors[i]; + } else { + const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + false); + const PointerType *PFTy = PointerType::getUnqual(FTy); + CSVals[1] = Constant::getNullValue(PFTy); + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), + 2147483647); + } + CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); + } + + // Create the array initializer. + const Type *StructTy = + cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + CAList.size()), CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == GCL->getInitializer()->getType()) { + GCL->setInitializer(CA); + return GCL; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), + GCL->getLinkage(), CA, "", + GCL->isThreadLocal()); + GCL->getParent()->getGlobalList().insert(GCL, NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); + + if (Ctors.size()) + return NGV; + else + return 0; +} + + +static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, + Value *V) { + if (Constant *CV = dyn_cast<Constant>(V)) return CV; + Constant *R = ComputedValues[V]; + assert(R && "Reference to an uncomputed value!"); + return R; +} + +/// isSimpleEnoughPointerToCommit - Return true if this constant is simple +/// enough for us to understand. In particular, if it is a cast of something, +/// we punt. We basically just support direct accesses to globals and GEP's of +/// globals. This should be kept up to date with CommitValueTo. +static bool isSimpleEnoughPointerToCommit(Constant *C) { + // Conservatively, avoid aggregate types. This is because we don't + // want to worry about them partially overlapping other stores. + if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) + return false; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + // Do not allow weak/linkonce/dllimport/dllexport linkage or + // external globals. + return GV->hasDefinitiveInitializer(); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + // Handle a constantexpr gep. + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0)) && + cast<GEPOperator>(CE)->isInBounds()) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + // Do not allow weak/linkonce/dllimport/dllexport linkage or + // external globals. + if (!GV->hasDefinitiveInitializer()) + return false; + + // The first index must be zero. + ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); + if (!CI || !CI->isZero()) return false; + + // The remaining indices must be compile-time known integers within the + // notional bounds of the corresponding static array types. + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + return false; +} + +/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global +/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. +/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. +static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, + ConstantExpr *Addr, unsigned OpNo) { + // Base case of the recursion. + if (OpNo == Addr->getNumOperands()) { + assert(Val->getType() == Init->getType() && "Type mismatch!"); + return Val; + } + + std::vector<Constant*> Elts; + if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + + // Break up the constant into its elements. + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { + for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (isa<ConstantAggregateZero>(Init)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(Constant::getNullValue(STy->getElementType(i))); + } else if (isa<UndefValue>(Init)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(UndefValue::get(STy->getElementType(i))); + } else { + llvm_unreachable("This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + } + + // Replace the element that we are supposed to. + ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); + unsigned Idx = CU->getZExtValue(); + assert(Idx < STy->getNumElements() && "Struct index out of range!"); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); + + // Return the modified struct. + return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), + STy->isPacked()); + } else { + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); + const SequentialType *InitTy = cast<SequentialType>(Init->getType()); + + uint64_t NumElts; + if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast<VectorType>(InitTy)->getNumElements(); + + + // Break up the array into elements. + if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (isa<ConstantAggregateZero>(Init)) { + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); + } else { + assert(isa<UndefValue>(Init) && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); + } + + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + else + return ConstantVector::get(&Elts[0], Elts.size()); + } +} + +/// CommitValueTo - We have decided that Addr (which satisfies the predicate +/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. +static void CommitValueTo(Constant *Val, Constant *Addr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { + assert(GV->hasInitializer()); + GV->setInitializer(Val); + return; + } + + ConstantExpr *CE = cast<ConstantExpr>(Addr); + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); +} + +/// ComputeLoadResult - Return the value that would be computed by a load from +/// P after the stores reflected by 'memory' have been performed. If we can't +/// decide, return null. +static Constant *ComputeLoadResult(Constant *P, + const DenseMap<Constant*, Constant*> &Memory) { + // If this memory location has been recently stored, use the stored value: it + // is the most up-to-date. + DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); + if (I != Memory.end()) return I->second; + + // Access it. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { + if (GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + return 0; + } + + // Handle a constantexpr getelementptr. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0))) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + if (GV->hasDefinitiveInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + + return 0; // don't know how to evaluate. +} + +/// EvaluateFunction - Evaluate a call to function F, returning true if +/// successful, false if we can't evaluate it. ActualArgs contains the formal +/// arguments for the function. +static bool EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl<Constant*> &ActualArgs, + std::vector<Function*> &CallStack, + DenseMap<Constant*, Constant*> &MutatedMemory, + std::vector<GlobalVariable*> &AllocaTmps) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) + return false; + + CallStack.push_back(F); + + /// Values - As we compute SSA register values, we store their contents here. + DenseMap<Value*, Constant*> Values; + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + Values[AI] = ActualArgs[ArgNo]; + + /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + /// we can only evaluate any one basic block at most once. This set keeps + /// track of what we have executed so we can detect recursive cases etc. + SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; + + // CurInst - The current instruction we're evaluating. + BasicBlock::iterator CurInst = F->begin()->begin(); + + // This is the main evaluation loop. + while (1) { + Constant *InstResult = 0; + + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { + if (SI->isVolatile()) return false; // no volatile accesses. + Constant *Ptr = getVal(Values, SI->getOperand(1)); + if (!isSimpleEnoughPointerToCommit(Ptr)) + // If this is too complex for us to commit, reject it. + return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + MutatedMemory[Ptr] = Val; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { + InstResult = ConstantExpr::get(BO->getOpcode(), + getVal(Values, BO->getOperand(0)), + getVal(Values, BO->getOperand(1))); + } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { + InstResult = ConstantExpr::getCompare(CI->getPredicate(), + getVal(Values, CI->getOperand(0)), + getVal(Values, CI->getOperand(1))); + } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { + InstResult = ConstantExpr::getCast(CI->getOpcode(), + getVal(Values, CI->getOperand(0)), + CI->getType()); + } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { + InstResult = + ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + getVal(Values, SI->getOperand(1)), + getVal(Values, SI->getOperand(2))); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { + Constant *P = getVal(Values, GEP->getOperand(0)); + SmallVector<Constant*, 8> GEPOps; + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); + i != e; ++i) + GEPOps.push_back(getVal(Values, *i)); + InstResult = cast<GEPOperator>(GEP)->isInBounds() ? + ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : + ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { + if (LI->isVolatile()) return false; // no volatile accesses. + InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), + MutatedMemory); + if (InstResult == 0) return false; // Could not evaluate load. + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { + if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. + const Type *Ty = AI->getType()->getElementType(); + AllocaTmps.push_back(new GlobalVariable(Ty, false, + GlobalValue::InternalLinkage, + UndefValue::get(Ty), + AI->getName())); + InstResult = AllocaTmps.back(); + } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { + + // Debug info can safely be ignored here. + if (isa<DbgInfoIntrinsic>(CI)) { + ++CurInst; + continue; + } + + // Cannot handle inline asm. + if (isa<InlineAsm>(CI->getCalledValue())) return false; + + // Resolve function pointers. + Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); + if (!Callee) return false; // Cannot resolve. + + SmallVector<Constant*, 8> Formals; + for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + i != e; ++i) + Formals.push_back(getVal(Values, *i)); + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(Callee, Formals.data(), + Formals.size())) { + InstResult = C; + } else { + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) + return false; + + Constant *RetVal; + // Execute the call, if successful, use the return value. + if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, + MutatedMemory, AllocaTmps)) + return false; + InstResult = RetVal; + } + } else if (isa<TerminatorInst>(CurInst)) { + BasicBlock *NewBB = 0; + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { + if (BI->isUnconditional()) { + NewBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NewBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { + ConstantInt *Val = + dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); + if (!Val) return false; // Cannot determine. + NewBB = SI->getSuccessor(SI->findCaseValue(Val)); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { + Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts(); + if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) + NewBB = BA->getBasicBlock(); + else + return false; // Cannot determine. + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { + if (RI->getNumOperands()) + RetVal = getVal(Values, RI->getOperand(0)); + + CallStack.pop_back(); // return from fn. + return true; // We succeeded at evaluating this ctor! + } else { + // invoke, unwind, unreachable. + return false; // Cannot handle this terminator. + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NewBB)) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + BasicBlock *OldBB = CurInst->getParent(); + CurInst = NewBB->begin(); + PHINode *PN; + for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) + Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); + + // Do NOT increment CurInst. We know that the terminator had no value. + continue; + } else { + // Did not know how to evaluate this! + return false; + } + + if (!CurInst->use_empty()) + Values[CurInst] = InstResult; + + // Advance program counter. + ++CurInst; + } +} + +/// EvaluateStaticConstructor - Evaluate static constructors in the function, if +/// we can. Return true if we can, false otherwise. +static bool EvaluateStaticConstructor(Function *F) { + /// MutatedMemory - For each store we execute, we update this map. Loads + /// check this to get the most up-to-date value. If evaluation is successful, + /// this state is committed to the process. + DenseMap<Constant*, Constant*> MutatedMemory; + + /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable + /// to represent its body. This vector is needed so we can delete the + /// temporary globals when we are done. + std::vector<GlobalVariable*> AllocaTmps; + + /// CallStack - This is used to detect recursion. In pathological situations + /// we could hit exponential behavior, but at least there is nothing + /// unbounded. + std::vector<Function*> CallStack; + + // Call the function. + Constant *RetValDummy; + bool EvalSuccess = EvaluateFunction(F, RetValDummy, + SmallVector<Constant*, 0>(), CallStack, + MutatedMemory, AllocaTmps); + if (EvalSuccess) { + // We succeeded at evaluation: commit the result. + DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " << MutatedMemory.size() + << " stores.\n"); + for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), + E = MutatedMemory.end(); I != E; ++I) + CommitValueTo(I->second, I->first); + } + + // At this point, we are done interpreting. If we created any 'alloca' + // temporaries, release them now. + while (!AllocaTmps.empty()) { + GlobalVariable *Tmp = AllocaTmps.back(); + AllocaTmps.pop_back(); + + // If there are still users of the alloca, the program is doing something + // silly, e.g. storing the address of the alloca somewhere and using it + // later. Since this is undefined, we'll just make it be null. + if (!Tmp->use_empty()) + Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); + delete Tmp; + } + + return EvalSuccess; +} + + + +/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. +/// Return true if anything changed. +bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { + std::vector<Function*> Ctors = ParseGlobalCtors(GCL); + bool MadeChange = false; + if (Ctors.empty()) return false; + + // Loop over global ctors, optimizing them when we can. + for (unsigned i = 0; i != Ctors.size(); ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (F == 0) { + if (i != Ctors.size()-1) { + Ctors.resize(i+1); + MadeChange = true; + } + break; + } + + // We cannot simplify external ctor functions. + if (F->empty()) continue; + + // If we can evaluate the ctor at compile time, do. + if (EvaluateStaticConstructor(F)) { + Ctors.erase(Ctors.begin()+i); + MadeChange = true; + --i; + ++NumCtorsEvaluated; + continue; + } + } + + if (!MadeChange) return false; + + GCL = InstallGlobalCtors(GCL, Ctors); + return true; +} + +bool GlobalOpt::OptimizeGlobalAliases(Module &M) { + bool Changed = false; + + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E;) { + Module::alias_iterator J = I++; + // Aliases without names cannot be referenced outside this module. + if (!J->hasName() && !J->isDeclaration()) + J->setLinkage(GlobalValue::InternalLinkage); + // If the aliasee may change at link time, nothing can be done - bail out. + if (J->mayBeOverridden()) + continue; + + Constant *Aliasee = J->getAliasee(); + GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); + Target->removeDeadConstantUsers(); + bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); + + // Make all users of the alias use the aliasee instead. + if (!J->use_empty()) { + J->replaceAllUsesWith(Aliasee); + ++NumAliasesResolved; + Changed = true; + } + + // If the alias is externally visible, we may still be able to simplify it. + if (!J->hasLocalLinkage()) { + // If the aliasee has internal linkage, give it the name and linkage + // of the alias, and delete the alias. This turns: + // define internal ... @f(...) + // @a = alias ... @f + // into: + // define ... @a(...) + if (!Target->hasLocalLinkage()) + continue; + + // Do not perform the transform if multiple aliases potentially target the + // aliasee. This check also ensures that it is safe to replace the section + // and other attributes of the aliasee with those of the alias. + if (!hasOneUse) + continue; + + // Give the aliasee the name, linkage and other attributes of the alias. + Target->takeName(J); + Target->setLinkage(J->getLinkage()); + Target->GlobalValue::copyAttributesFrom(J); + } + + // Delete the alias. + M.getAliasList().erase(J); + ++NumAliasesRemoved; + Changed = true; + } + + return Changed; +} + +bool GlobalOpt::runOnModule(Module &M) { + bool Changed = false; + + // Try to find the llvm.globalctors list. + GlobalVariable *GlobalCtors = FindGlobalCtors(M); + + bool LocalChange = true; + while (LocalChange) { + LocalChange = false; + + // Delete functions that are trivially dead, ccc -> fastcc + LocalChange |= OptimizeFunctions(M); + + // Optimize global_ctors list. + if (GlobalCtors) + LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); + + // Optimize non-address-taken globals. + LocalChange |= OptimizeGlobalVars(M); + + // Resolve aliases, when possible. + LocalChange |= OptimizeGlobalAliases(M); + Changed |= LocalChange; + } + + // TODO: Move all global ctors functions to the end of the module for code + // layout. + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp new file mode 100644 index 0000000..df2456f --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -0,0 +1,276 @@ +//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements an _extremely_ simple interprocedural constant +// propagation pass. It could certainly be improved in many different ways, +// like using a worklist. This pass makes arguments dead, but does not remove +// them. The existing dead argument elimination pass should be run after this +// to clean up the mess. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ipconstprop" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CallSite.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +STATISTIC(NumArgumentsProped, "Number of args turned into constants"); +STATISTIC(NumReturnValProped, "Number of return values turned into constants"); + +namespace { + /// IPCP - The interprocedural constant propagation pass + /// + struct IPCP : public ModulePass { + static char ID; // Pass identification, replacement for typeid + IPCP() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + private: + bool PropagateConstantsIntoArguments(Function &F); + bool PropagateConstantReturn(Function &F); + }; +} + +char IPCP::ID = 0; +static RegisterPass<IPCP> +X("ipconstprop", "Interprocedural constant propagation"); + +ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } + +bool IPCP::runOnModule(Module &M) { + bool Changed = false; + bool LocalChange = true; + + // FIXME: instead of using smart algorithms, we just iterate until we stop + // making changes. + while (LocalChange) { + LocalChange = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) { + // Delete any klingons. + I->removeDeadConstantUsers(); + if (I->hasLocalLinkage()) + LocalChange |= PropagateConstantsIntoArguments(*I); + Changed |= PropagateConstantReturn(*I); + } + Changed |= LocalChange; + } + return Changed; +} + +/// PropagateConstantsIntoArguments - Look at all uses of the specified +/// function. If all uses are direct call sites, and all pass a particular +/// constant in for an argument, propagate that constant in as the argument. +/// +bool IPCP::PropagateConstantsIntoArguments(Function &F) { + if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. + + // For each argument, keep track of its constant value and whether it is a + // constant or not. The bool is driven to true when found to be non-constant. + SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants; + ArgumentConstants.resize(F.arg_size()); + + unsigned NumNonconstant = 0; + for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + // Ignore blockaddress uses. + if (isa<BlockAddress>(*UI)) continue; + + // Used by a non-instruction, or not the callee of a function, do not + // transform. + if (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI)) + return false; + + CallSite CS = CallSite::get(cast<Instruction>(*UI)); + if (!CS.isCallee(UI)) + return false; + + // Check out all of the potentially constant arguments. Note that we don't + // inspect varargs here. + CallSite::arg_iterator AI = CS.arg_begin(); + Function::arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; + ++i, ++AI, ++Arg) { + + // If this argument is known non-constant, ignore it. + if (ArgumentConstants[i].second) + continue; + + Constant *C = dyn_cast<Constant>(*AI); + if (C && ArgumentConstants[i].first == 0) { + ArgumentConstants[i].first = C; // First constant seen. + } else if (C && ArgumentConstants[i].first == C) { + // Still the constant value we think it is. + } else if (*AI == &*Arg) { + // Ignore recursive calls passing argument down. + } else { + // Argument became non-constant. If all arguments are non-constant now, + // give up on this function. + if (++NumNonconstant == ArgumentConstants.size()) + return false; + ArgumentConstants[i].second = true; + } + } + } + + // If we got to this point, there is a constant argument! + assert(NumNonconstant != ArgumentConstants.size()); + bool MadeChange = false; + Function::arg_iterator AI = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { + // Do we have a constant argument? + if (ArgumentConstants[i].second || AI->use_empty() || + (AI->hasByValAttr() && !F.onlyReadsMemory())) + continue; + + Value *V = ArgumentConstants[i].first; + if (V == 0) V = UndefValue::get(AI->getType()); + AI->replaceAllUsesWith(V); + ++NumArgumentsProped; + MadeChange = true; + } + return MadeChange; +} + + +// Check to see if this function returns one or more constants. If so, replace +// all callers that use those return values with the constant value. This will +// leave in the actual return values and instructions, but deadargelim will +// clean that up. +// +// Additionally if a function always returns one of its arguments directly, +// callers will be updated to use the value they pass in directly instead of +// using the return value. +bool IPCP::PropagateConstantReturn(Function &F) { + if (F.getReturnType()->isVoidTy()) + return false; // No return value. + + // If this function could be overridden later in the link stage, we can't + // propagate information about its results into callers. + if (F.mayBeOverridden()) + return false; + + // Check to see if this function returns a constant. + SmallVector<Value *,4> RetVals; + const StructType *STy = dyn_cast<StructType>(F.getReturnType()); + if (STy) + for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) + RetVals.push_back(UndefValue::get(STy->getElementType(i))); + else + RetVals.push_back(UndefValue::get(F.getReturnType())); + + unsigned NumNonConstant = 0; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + for (unsigned i = 0, e = RetVals.size(); i != e; ++i) { + // Already found conflicting return values? + Value *RV = RetVals[i]; + if (!RV) + continue; + + // Find the returned value + Value *V; + if (!STy) + V = RI->getOperand(i); + else + V = FindInsertedValue(RI->getOperand(0), i); + + if (V) { + // Ignore undefs, we can change them into anything + if (isa<UndefValue>(V)) + continue; + + // Try to see if all the rets return the same constant or argument. + if (isa<Constant>(V) || isa<Argument>(V)) { + if (isa<UndefValue>(RV)) { + // No value found yet? Try the current one. + RetVals[i] = V; + continue; + } + // Returning the same value? Good. + if (RV == V) + continue; + } + } + // Different or no known return value? Don't propagate this return + // value. + RetVals[i] = 0; + // All values non constant? Stop looking. + if (++NumNonConstant == RetVals.size()) + return false; + } + } + + // If we got here, the function returns at least one constant value. Loop + // over all users, replacing any uses of the return value with the returned + // constant. + bool MadeChange = false; + for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + Instruction* Call = CS.getInstruction(); + + // Not a call instruction or a call instruction that's not calling F + // directly? + if (!Call || !CS.isCallee(UI)) + continue; + + // Call result not used? + if (Call->use_empty()) + continue; + + MadeChange = true; + + if (STy == 0) { + Value* New = RetVals[0]; + if (Argument *A = dyn_cast<Argument>(New)) + // Was an argument returned? Then find the corresponding argument in + // the call instruction and use that. + New = CS.getArgument(A->getArgNo()); + Call->replaceAllUsesWith(New); + continue; + } + + for (Value::use_iterator I = Call->use_begin(), E = Call->use_end(); + I != E;) { + Instruction *Ins = cast<Instruction>(*I); + + // Increment now, so we can remove the use + ++I; + + // Find the index of the retval to replace with + int index = -1; + if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins)) + if (EV->hasIndices()) + index = *EV->idx_begin(); + + // If this use uses a specific return value, and we have a replacement, + // replace it. + if (index != -1) { + Value *New = RetVals[index]; + if (New) { + if (Argument *A = dyn_cast<Argument>(New)) + // Was an argument returned? Then find the corresponding argument in + // the call instruction and use that. + New = CS.getArgument(A->getArgNo()); + Ins->replaceAllUsesWith(New); + Ins->eraseFromParent(); + } + } + } + } + + if (MadeChange) ++NumReturnValProped; + return MadeChange; +} diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp new file mode 100644 index 0000000..340b70e --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -0,0 +1,84 @@ +//===-- Scalar.cpp --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the C bindings for libLLVMIPO.a, which implements +// several transformations over the LLVM intermediate representation. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Transforms/IPO.h" +#include "llvm/PassManager.h" +#include "llvm/Transforms/IPO.h" + +using namespace llvm; + +void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createArgumentPromotionPass()); +} + +void LLVMAddConstantMergePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createConstantMergePass()); +} + +void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadArgEliminationPass()); +} + +void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadTypeEliminationPass()); +} + +void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionAttrsPass()); +} + +void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createFunctionInliningPass()); +} + +void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGlobalDCEPass()); +} + +void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGlobalOptimizerPass()); +} + +void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPConstantPropagationPass()); +} + +void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerSetJmpPass()); +} + +void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPruneEHPass()); +} + +void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPSCCPPass()); +} + +void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { + unwrap(PM)->add(createInternalizePass(AllButMain != 0)); +} + + +void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { + // FIXME: Remove in LLVM 3.0. +} + +void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStripDeadPrototypesPass()); +} + +void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStripSymbolsPass()); +} diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp new file mode 100644 index 0000000..8e312e7 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -0,0 +1,80 @@ +//===- InlineAlways.cpp - Code to inline always_inline functions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a custom inliner that handles only functions that +// are marked as "always inline". +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/CallingConv.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/ADT/SmallPtrSet.h" + +using namespace llvm; + +namespace { + + // AlwaysInliner only inlines functions that are mark as "always inline". + class AlwaysInliner : public Inliner { + // Functions that are never inlined + SmallPtrSet<const Function*, 16> NeverInline; + InlineCostAnalyzer CA; + public: + // Use extremely low threshold. + AlwaysInliner() : Inliner(&ID, -2000000000) {} + static char ID; // Pass identification, replacement for typeid + InlineCost getInlineCost(CallSite CS) { + return CA.getInlineCost(CS, NeverInline); + } + float getInlineFudgeFactor(CallSite CS) { + return CA.getInlineFudgeFactor(CS); + } + void resetCachedCostInfo(Function *Caller) { + CA.resetCachedCostInfo(Caller); + } + void growCachedCostInfo(Function* Caller, Function* Callee) { + CA.growCachedCostInfo(Caller, Callee); + } + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, &NeverInline); + } + virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } + }; +} + +char AlwaysInliner::ID = 0; +static RegisterPass<AlwaysInliner> +X("always-inline", "Inliner for always_inline functions"); + +Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } + +// doInitialization - Initializes the vector of functions that have not +// been annotated with the "always inline" attribute. +bool AlwaysInliner::doInitialization(CallGraph &CG) { + Module &M = CG.getModule(); + + for (Module::iterator I = M.begin(), E = M.end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline)) + NeverInline.insert(I); + + return false; +} diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp new file mode 100644 index 0000000..74b4a1c --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -0,0 +1,111 @@ +//===- InlineSimple.cpp - Code to perform simple function inlining --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bottom-up inlining of functions into callees. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/CallingConv.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/ADT/SmallPtrSet.h" + +using namespace llvm; + +namespace { + + class SimpleInliner : public Inliner { + // Functions that are never inlined + SmallPtrSet<const Function*, 16> NeverInline; + InlineCostAnalyzer CA; + public: + SimpleInliner() : Inliner(&ID) {} + SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {} + static char ID; // Pass identification, replacement for typeid + InlineCost getInlineCost(CallSite CS) { + return CA.getInlineCost(CS, NeverInline); + } + float getInlineFudgeFactor(CallSite CS) { + return CA.getInlineFudgeFactor(CS); + } + void resetCachedCostInfo(Function *Caller) { + CA.resetCachedCostInfo(Caller); + } + void growCachedCostInfo(Function* Caller, Function* Callee) { + CA.growCachedCostInfo(Caller, Callee); + } + virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } + }; +} + +char SimpleInliner::ID = 0; +static RegisterPass<SimpleInliner> +X("inline", "Function Integration/Inlining"); + +Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } + +Pass *llvm::createFunctionInliningPass(int Threshold) { + return new SimpleInliner(Threshold); +} + +// doInitialization - Initializes the vector of functions that have been +// annotated with the noinline attribute. +bool SimpleInliner::doInitialization(CallGraph &CG) { + + Module &M = CG.getModule(); + + for (Module::iterator I = M.begin(), E = M.end(); + I != E; ++I) + if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline)) + NeverInline.insert(I); + + // Get llvm.noinline + GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); + + if (GV == 0) + return false; + + // Don't crash on invalid code + if (!GV->hasDefinitiveInitializer()) + return false; + + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + + if (InitList == 0) + return false; + + // Iterate over each element and add to the NeverInline set + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + + // Get Source + const Constant *Elt = InitList->getOperand(i); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt)) + if (CE->getOpcode() == Instruction::BitCast) + Elt = CE->getOperand(0); + + // Insert into set of functions to never inline + if (const Function *F = dyn_cast<Function>(Elt)) + NeverInline.insert(F); + } + + return false; +} + diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp new file mode 100644 index 0000000..b785bb0 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -0,0 +1,548 @@ +//===- Inliner.cpp - Code common to all inliners --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the mechanics required to implement inlining without +// missing any calls and updating the call graph. The decisions of which calls +// are profitable to inline are implemented elsewhere. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include <set> +using namespace llvm; + +STATISTIC(NumInlined, "Number of functions inlined"); +STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); +STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +STATISTIC(NumMergedAllocas, "Number of allocas merged together"); + +static cl::opt<int> +InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, + cl::desc("Control the amount of inlining to perform (default = 225)")); + +static cl::opt<int> +HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), + cl::desc("Threshold for inlining functions with inline hint")); + +// Threshold to use when optsize is specified (and there is no -inline-limit). +const int OptSizeThreshold = 75; + +Inliner::Inliner(void *ID) + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} + +Inliner::Inliner(void *ID, int Threshold) + : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { + CallGraphSCCPass::getAnalysisUsage(Info); +} + + +typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > +InlinedArrayAllocasTy; + +/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// do so and update the CallGraph for this operation. +/// +/// This function also does some basic book-keeping to update the IR. The +/// InlinedArrayAllocas map keeps track of any allocas that are already +/// available from other functions inlined into the caller. If we are able to +/// inline this call site we attempt to reuse already available allocas or add +/// any new allocas to the set if not possible. +static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas) { + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + + // Try to inline the function. Get the list of static allocas that were + // inlined. + if (!InlineFunction(CS, IFI)) + return false; + + // If the inlined function had a higher stack protection level than the + // calling function, then bump up the caller's stack protection level. + if (Callee->hasFnAttr(Attribute::StackProtectReq)) + Caller->addFnAttr(Attribute::StackProtectReq); + else if (Callee->hasFnAttr(Attribute::StackProtect) && + !Caller->hasFnAttr(Attribute::StackProtectReq)) + Caller->addFnAttr(Attribute::StackProtect); + + + // Look at all of the allocas that we inlined through this call site. If we + // have already inlined other allocas through other calls into this function, + // then we know that they have disjoint lifetimes and that we can merge them. + // + // There are many heuristics possible for merging these allocas, and the + // different options have different tradeoffs. One thing that we *really* + // don't want to hurt is SRoA: once inlining happens, often allocas are no + // longer address taken and so they can be promoted. + // + // Our "solution" for that is to only merge allocas whose outermost type is an + // array type. These are usually not promoted because someone is using a + // variable index into them. These are also often the most important ones to + // merge. + // + // A better solution would be to have real memory lifetime markers in the IR + // and not have the inliner do any merging of allocas at all. This would + // allow the backend to do proper stack slot coloring of all allocas that + // *actually make it to the backend*, which is really what we want. + // + // Because we don't have this information, we do this simple and useful hack. + // + SmallPtrSet<AllocaInst*, 16> UsedAllocas; + + // Loop over all the allocas we have so far and see if they can be merged with + // a previously inlined alloca. If not, remember that we had it. + for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); + AllocaNo != e; ++AllocaNo) { + AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; + + // Don't bother trying to merge array allocations (they will usually be + // canonicalized to be an allocation *of* an array), or allocations whose + // type is not itself an array (because we're afraid of pessimizing SRoA). + const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + if (ATy == 0 || AI->isArrayAllocation()) + continue; + + // Get the list of all available allocas for this array type. + std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; + + // Loop over the allocas in AllocasForType to see if we can reuse one. Note + // that we have to be careful not to reuse the same "available" alloca for + // multiple different allocas that we just inlined, we use the 'UsedAllocas' + // set to keep track of which "available" allocas are being used by this + // function. Also, AllocasForType can be empty of course! + bool MergedAwayAlloca = false; + for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { + AllocaInst *AvailableAlloca = AllocasForType[i]; + + // The available alloca has to be in the right function, not in some other + // function in this SCC. + if (AvailableAlloca->getParent() != AI->getParent()) + continue; + + // If the inlined function already uses this alloca then we can't reuse + // it. + if (!UsedAllocas.insert(AvailableAlloca)) + continue; + + // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare + // success! + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI); + + AI->replaceAllUsesWith(AvailableAlloca); + AI->eraseFromParent(); + MergedAwayAlloca = true; + ++NumMergedAllocas; + break; + } + + // If we already nuked the alloca, we're done with it. + if (MergedAwayAlloca) + continue; + + // If we were unable to merge away the alloca either because there are no + // allocas of the right type available or because we reused them all + // already, remember that this alloca came from an inlined function and mark + // it used so we don't reuse it for other allocas from this inline + // operation. + AllocasForType.push_back(AI); + UsedAllocas.insert(AI); + } + + return true; +} + +unsigned Inliner::getInlineThreshold(CallSite CS) const { + int thres = InlineThreshold; + + // Listen to optsize when -inline-limit is not given. + Function *Caller = CS.getCaller(); + if (Caller && !Caller->isDeclaration() && + Caller->hasFnAttr(Attribute::OptimizeForSize) && + InlineLimit.getNumOccurrences() == 0) + thres = OptSizeThreshold; + + // Listen to inlinehint when it would increase the threshold. + Function *Callee = CS.getCalledFunction(); + if (HintThreshold > thres && Callee && !Callee->isDeclaration() && + Callee->hasFnAttr(Attribute::InlineHint)) + thres = HintThreshold; + + return thres; +} + +/// shouldInline - Return true if the inliner should attempt to inline +/// at the given CallSite. +bool Inliner::shouldInline(CallSite CS) { + InlineCost IC = getInlineCost(CS); + + if (IC.isAlways()) { + DEBUG(dbgs() << " Inlining: cost=always" + << ", Call: " << *CS.getInstruction() << "\n"); + return true; + } + + if (IC.isNever()) { + DEBUG(dbgs() << " NOT Inlining: cost=never" + << ", Call: " << *CS.getInstruction() << "\n"); + return false; + } + + int Cost = IC.getValue(); + Function *Caller = CS.getCaller(); + int CurrentThreshold = getInlineThreshold(CS); + float FudgeFactor = getInlineFudgeFactor(CS); + int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); + if (Cost >= AdjThreshold) { + DEBUG(dbgs() << " NOT Inlining: cost=" << Cost + << ", thres=" << AdjThreshold + << ", Call: " << *CS.getInstruction() << "\n"); + return false; + } + + // Try to detect the case where the current inlining candidate caller + // (call it B) is a static function and is an inlining candidate elsewhere, + // and the current candidate callee (call it C) is large enough that + // inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B + // into its callers. + if (Caller->hasLocalLinkage()) { + int TotalSecondaryCost = 0; + bool outerCallsFound = false; + bool allOuterCallsWillBeInlined = true; + bool someOuterCallWouldNotBeInlined = false; + for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); + I != E; ++I) { + CallSite CS2 = CallSite::get(*I); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. + if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller) + continue; + + InlineCost IC2 = getInlineCost(CS2); + if (IC2.isNever()) + allOuterCallsWillBeInlined = false; + if (IC2.isAlways() || IC2.isNever()) + continue; + + outerCallsFound = true; + int Cost2 = IC2.getValue(); + int CurrentThreshold2 = getInlineThreshold(CS2); + float FudgeFactor2 = getInlineFudgeFactor(CS2); + + if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) + allOuterCallsWillBeInlined = false; + + // See if we have this case. We subtract off the penalty + // for the call instruction, which we would be deleting. + if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && + Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= + (int)(CurrentThreshold2 * FudgeFactor2)) { + someOuterCallWouldNotBeInlined = true; + TotalSecondaryCost += Cost2; + } + } + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) + TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + + if (outerCallsFound && someOuterCallWouldNotBeInlined && + TotalSecondaryCost < Cost) { + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << Cost << + ", outer Cost = " << TotalSecondaryCost << '\n'); + return false; + } + } + + DEBUG(dbgs() << " Inlining: cost=" << Cost + << ", thres=" << AdjThreshold + << ", Call: " << *CS.getInstruction() << '\n'); + return true; +} + +/// InlineHistoryIncludes - Return true if the specified inline history ID +/// indicates an inline history that includes the specified function. +static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, + const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { + while (InlineHistoryID != -1) { + assert(unsigned(InlineHistoryID) < InlineHistory.size() && + "Invalid inline history ID"); + if (InlineHistory[InlineHistoryID].first == F) + return true; + InlineHistoryID = InlineHistory[InlineHistoryID].second; + } + return false; +} + + +bool Inliner::runOnSCC(CallGraphSCC &SCC) { + CallGraph &CG = getAnalysis<CallGraph>(); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + + SmallPtrSet<Function*, 8> SCCFunctions; + DEBUG(dbgs() << "Inliner visiting SCC:"); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + if (F) SCCFunctions.insert(F); + DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); + } + + // Scan through and identify all call sites ahead of time so that we only + // inline call sites in the original functions, not call sites that result + // from inlining other functions. + SmallVector<std::pair<CallSite, int>, 16> CallSites; + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector<std::pair<Function*, int>, 8> InlineHistory; + + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + if (!F) continue; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS = CallSite::get(I); + // If this isn't a call, or it is a call to an intrinsic, it can + // never be inlined. + if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) + continue; + + // If this is a direct call to an external function, we can never inline + // it. If it is an indirect call, inlining may resolve it to be a + // direct call, so we keep it. + if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) + continue; + + CallSites.push_back(std::make_pair(CS, -1)); + } + } + + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); + + // If there are no calls in this function, exit early. + if (CallSites.empty()) + return false; + + // Now that we have all of the call sites, move the ones to functions in the + // current SCC to the end of the list. + unsigned FirstCallInSCC = CallSites.size(); + for (unsigned i = 0; i < FirstCallInSCC; ++i) + if (Function *F = CallSites[i].first.getCalledFunction()) + if (SCCFunctions.count(F)) + std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + + + InlinedArrayAllocasTy InlinedArrayAllocas; + InlineFunctionInfo InlineInfo(&CG, TD); + + // Now that we have all of the call sites, loop over them and inline them if + // it looks profitable to do so. + bool Changed = false; + bool LocalChange; + do { + LocalChange = false; + // Iterate over the outer loop because inlining functions can cause indirect + // calls to become direct calls. + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { + CallSite CS = CallSites[CSi].first; + + Function *Caller = CS.getCaller(); + Function *Callee = CS.getCalledFunction(); + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (isInstructionTriviallyDead(CS.getInstruction())) { + DEBUG(dbgs() << " -> Deleting dead call: " + << *CS.getInstruction() << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + CS.getInstruction()->eraseFromParent(); + ++NumCallsDeleted; + // Update the cached cost info with the missing call + growCachedCostInfo(Caller, NULL); + } else { + // We can only inline direct calls to non-declarations. + if (Callee == 0 || Callee->isDeclaration()) continue; + + // If this call sites was obtained by inlining another function, verify + // that the include path for the function did not include the callee + // itself. If so, we'd be recursively inlinling the same function, + // which would provide the same callsites, which would cause us to + // infinitely inline. + int InlineHistoryID = CallSites[CSi].second; + if (InlineHistoryID != -1 && + InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) + continue; + + + // If the policy determines that we should inline this function, + // try to do so. + if (!shouldInline(CS)) + continue; + + // Attempt to inline the function. + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) + continue; + ++NumInlined; + + // If inlining this function gave us any new call sites, throw them + // onto our worklist to process. They are useful inline candidates. + if (!InlineInfo.InlinedCalls.empty()) { + // Create a new inline history entry for this, so that we remember + // that these new callsites came about due to inlining Callee. + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); + + for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); + i != e; ++i) { + Value *Ptr = InlineInfo.InlinedCalls[i]; + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + } + } + + // Update the cached cost info with the inlined call. + growCachedCostInfo(Caller, Callee); + } + + // If we inlined or deleted the last possible call site to the function, + // delete the function body now. + if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && + // TODO: Can remove if in SCC now. + !SCCFunctions.count(Callee) && + + // The function may be apparently dead, but if there are indirect + // callgraph references to the node, we cannot delete it yet, this + // could invalidate the CGSCC iterator. + CG[Callee]->getNumReferences() == 0) { + DEBUG(dbgs() << " -> Deleting dead function: " + << Callee->getName() << "\n"); + CallGraphNode *CalleeNode = CG[Callee]; + + // Remove any call graph edges from the callee to its callees. + CalleeNode->removeAllCalledFunctions(); + + resetCachedCostInfo(Callee); + + // Removing the node for callee from the call graph and delete it. + delete CG.removeFunctionFromModule(CalleeNode); + ++NumDeleted; + } + + // Remove this call site from the list. If possible, use + // swap/pop_back for efficiency, but do not use it if doing so would + // move a call site to a function in this SCC before the + // 'FirstCallInSCC' barrier. + if (SCC.isSingular()) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + + Changed = true; + LocalChange = true; + } + } while (LocalChange); + + return Changed; +} + +// doFinalization - Remove now-dead linkonce functions at the end of +// processing to avoid breaking the SCC traversal. +bool Inliner::doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG); +} + +/// removeDeadFunctions - Remove dead functions that are not included in +/// DNR (Do Not Remove) list. +bool Inliner::removeDeadFunctions(CallGraph &CG, + SmallPtrSet<const Function *, 16> *DNR) { + SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove; + + // Scan for all of the functions, looking for ones that should now be removed + // from the program. Insert the dead ones in the FunctionsToRemove set. + for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { + CallGraphNode *CGN = I->second; + if (CGN->getFunction() == 0) + continue; + + Function *F = CGN->getFunction(); + + // If the only remaining users of the function are dead constants, remove + // them. + F->removeDeadConstantUsers(); + + if (DNR && DNR->count(F)) + continue; + if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && + !F->hasAvailableExternallyLinkage()) + continue; + if (!F->use_empty()) + continue; + + // Remove any call graph edges from the function to its callees. + CGN->removeAllCalledFunctions(); + + // Remove any edges from the external node to the function's call graph + // node. These edges might have been made irrelegant due to + // optimization of the program. + CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); + + // Removing the node for callee from the call graph and delete it. + FunctionsToRemove.insert(CGN); + } + + // Now that we know which functions to delete, do so. We didn't want to do + // this inline, because that would invalidate our CallGraph::iterator + // objects. :( + // + // Note that it doesn't matter that we are iterating over a non-stable set + // here to do this, it doesn't matter which order the functions are deleted + // in. + bool Changed = false; + for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), + E = FunctionsToRemove.end(); I != E; ++I) { + resetCachedCostInfo((*I)->getFunction()); + delete CG.removeFunctionFromModule(*I); + ++NumDeleted; + Changed = true; + } + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp new file mode 100644 index 0000000..47abb7d --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -0,0 +1,190 @@ +//===-- Internalize.cpp - Mark functions internal -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for a +// main function. If a main function is found, all other functions and all +// global variables with initializers are marked as internal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "internalize" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include <fstream> +#include <set> +using namespace llvm; + +STATISTIC(NumAliases , "Number of aliases internalized"); +STATISTIC(NumFunctions, "Number of functions internalized"); +STATISTIC(NumGlobals , "Number of global vars internalized"); + +// APIFile - A file which contains a list of symbols that should not be marked +// external. +static cl::opt<std::string> +APIFile("internalize-public-api-file", cl::value_desc("filename"), + cl::desc("A file containing list of symbol names to preserve")); + +// APIList - A list of symbols that should not be marked internal. +static cl::list<std::string> +APIList("internalize-public-api-list", cl::value_desc("list"), + cl::desc("A list of symbol names to preserve"), + cl::CommaSeparated); + +namespace { + class InternalizePass : public ModulePass { + std::set<std::string> ExternalNames; + /// If no api symbols were specified and a main function is defined, + /// assume the main function is the only API + bool AllButMain; + public: + static char ID; // Pass identification, replacement for typeid + explicit InternalizePass(bool AllButMain = true); + explicit InternalizePass(const std::vector <const char *>& exportList); + void LoadFile(const char *Filename); + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<CallGraph>(); + } + }; +} // end anonymous namespace + +char InternalizePass::ID = 0; +static RegisterPass<InternalizePass> +X("internalize", "Internalize Global Symbols"); + +InternalizePass::InternalizePass(bool AllButMain) + : ModulePass(&ID), AllButMain(AllButMain){ + if (!APIFile.empty()) // If a filename is specified, use it. + LoadFile(APIFile.c_str()); + if (!APIList.empty()) // If a list is specified, use it as well. + ExternalNames.insert(APIList.begin(), APIList.end()); +} + +InternalizePass::InternalizePass(const std::vector<const char *>&exportList) + : ModulePass(&ID), AllButMain(false){ + for(std::vector<const char *>::const_iterator itr = exportList.begin(); + itr != exportList.end(); itr++) { + ExternalNames.insert(*itr); + } +} + +void InternalizePass::LoadFile(const char *Filename) { + // Load the APIFile... + std::ifstream In(Filename); + if (!In.good()) { + errs() << "WARNING: Internalize couldn't load file '" << Filename + << "'! Continuing as if it's empty.\n"; + return; // Just continue as if the file were empty + } + while (In) { + std::string Symbol; + In >> Symbol; + if (!Symbol.empty()) + ExternalNames.insert(Symbol); + } +} + +bool InternalizePass::runOnModule(Module &M) { + CallGraph *CG = getAnalysisIfAvailable<CallGraph>(); + CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; + + if (ExternalNames.empty()) { + // Return if we're not in 'all but main' mode and have no external api + if (!AllButMain) + return false; + // If no list or file of symbols was specified, check to see if there is a + // "main" symbol defined in the module. If so, use it, otherwise do not + // internalize the module, it must be a library or something. + // + Function *MainFunc = M.getFunction("main"); + if (MainFunc == 0 || MainFunc->isDeclaration()) + return false; // No main found, must be a library... + + // Preserve main, internalize all else. + ExternalNames.insert(MainFunc->getName()); + } + + bool Changed = false; + + // Mark all functions not in the api as internal. + // FIXME: maybe use private linkage? + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && // Function must be defined here + !I->hasLocalLinkage() && // Can't already have internal linkage + !ExternalNames.count(I->getName())) {// Not marked to keep external? + I->setLinkage(GlobalValue::InternalLinkage); + // Remove a callgraph edge from the external node to this function. + if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); + Changed = true; + ++NumFunctions; + DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n"); + } + + // Never internalize the llvm.used symbol. It is used to implement + // attribute((used)). + // FIXME: Shouldn't this just filter on llvm.metadata section?? + ExternalNames.insert("llvm.used"); + ExternalNames.insert("llvm.compiler.used"); + + // Never internalize anchors used by the machine module info, else the info + // won't find them. (see MachineModuleInfo.) + ExternalNames.insert("llvm.dbg.compile_units"); + ExternalNames.insert("llvm.dbg.global_variables"); + ExternalNames.insert("llvm.dbg.subprograms"); + ExternalNames.insert("llvm.global_ctors"); + ExternalNames.insert("llvm.global_dtors"); + ExternalNames.insert("llvm.noinline"); + ExternalNames.insert("llvm.global.annotations"); + + // Mark all global variables with initializers that are not in the api as + // internal as well. + // FIXME: maybe use private linkage? + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasLocalLinkage() && + // Available externally is really just a "declaration with a body". + !I->hasAvailableExternallyLinkage() && + !ExternalNames.count(I->getName())) { + I->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + ++NumGlobals; + DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); + } + + // Mark all aliases that are not in the api as internal as well. + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasInternalLinkage() && + // Available externally is really just a "declaration with a body". + !I->hasAvailableExternallyLinkage() && + !ExternalNames.count(I->getName())) { + I->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + ++NumAliases; + DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); + } + + return Changed; +} + +ModulePass *llvm::createInternalizePass(bool AllButMain) { + return new InternalizePass(AllButMain); +} + +ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) { + return new InternalizePass(el); +} diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp new file mode 100644 index 0000000..cb81330 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -0,0 +1,242 @@ +//===- LoopExtractor.cpp - Extract each loop into a new function ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A pass wrapper around the ExtractLoop() scalar transformation to extract each +// top-level loop into its own new function. If the loop is the ONLY loop in a +// given function, it is not touched. This is a pass most useful for debugging +// via bugpoint. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-extract" +#include "llvm/Transforms/IPO.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/ADT/Statistic.h" +#include <fstream> +#include <set> +using namespace llvm; + +STATISTIC(NumExtracted, "Number of loops extracted"); + +namespace { + struct LoopExtractor : public LoopPass { + static char ID; // Pass identification, replacement for typeid + unsigned NumLoops; + + explicit LoopExtractor(unsigned numLoops = ~0) + : LoopPass(&ID), NumLoops(numLoops) {} + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<DominatorTree>(); + } + }; +} + +char LoopExtractor::ID = 0; +static RegisterPass<LoopExtractor> +X("loop-extract", "Extract loops into new functions"); + +namespace { + /// SingleLoopExtractor - For bugpoint. + struct SingleLoopExtractor : public LoopExtractor { + static char ID; // Pass identification, replacement for typeid + SingleLoopExtractor() : LoopExtractor(1) {} + }; +} // End anonymous namespace + +char SingleLoopExtractor::ID = 0; +static RegisterPass<SingleLoopExtractor> +Y("loop-extract-single", "Extract at most one loop into a new function"); + +// createLoopExtractorPass - This pass extracts all natural loops from the +// program into a function if it can. +// +Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } + +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { + // Only visit top-level loops. + if (L->getParentLoop()) + return false; + + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) + return false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + bool Changed = false; + + // If there is more than one top-level loop in this function, extract all of + // the loops. Otherwise there is exactly one top-level loop; in this case if + // this function is more than a minimal wrapper around the loop, extract + // the loop. + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + TerminatorInst *EntryTI = + L->getHeader()->getParent()->getEntryBlock().getTerminator(); + if (!isa<BranchInst>(EntryTI) || + !cast<BranchInst>(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != L->getHeader()) + ShouldExtractLoop = true; + else { + // Check to see if any exits from the loop are more than just return + // blocks. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { + ShouldExtractLoop = true; + break; + } + } + if (ShouldExtractLoop) { + if (NumLoops == 0) return Changed; + --NumLoops; + if (ExtractLoop(DT, L) != 0) { + Changed = true; + // After extraction, the loop is replaced by a function call, so + // we shouldn't try to run any more loop passes on it. + LPM.deleteLoopFromQueue(L); + } + ++NumExtracted; + } + + return Changed; +} + +// createSingleLoopExtractorPass - This pass extracts one natural loop from the +// program into a function if it can. This is used by bugpoint. +// +Pass *llvm::createSingleLoopExtractorPass() { + return new SingleLoopExtractor(); +} + + +// BlockFile - A file which contains a list of blocks that should not be +// extracted. +static cl::opt<std::string> +BlockFile("extract-blocks-file", cl::value_desc("filename"), + cl::desc("A file containing list of basic blocks to not extract"), + cl::Hidden); + +namespace { + /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks + /// from the module into their own functions except for those specified by the + /// BlocksToNotExtract list. + class BlockExtractorPass : public ModulePass { + void LoadFile(const char *Filename); + + std::vector<BasicBlock*> BlocksToNotExtract; + std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; + public: + static char ID; // Pass identification, replacement for typeid + explicit BlockExtractorPass(const std::vector<BasicBlock*> &B) + : ModulePass(&ID), BlocksToNotExtract(B) { + if (!BlockFile.empty()) + LoadFile(BlockFile.c_str()); + } + BlockExtractorPass() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + }; +} + +char BlockExtractorPass::ID = 0; +static RegisterPass<BlockExtractorPass> +XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)"); + +// createBlockExtractorPass - This pass extracts all blocks (except those +// specified in the argument list) from the functions in the module. +// +ModulePass *llvm::createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE) +{ + return new BlockExtractorPass(BTNE); +} + +void BlockExtractorPass::LoadFile(const char *Filename) { + // Load the BlockFile... + std::ifstream In(Filename); + if (!In.good()) { + errs() << "WARNING: BlockExtractor couldn't load file '" << Filename + << "'!\n"; + return; + } + while (In) { + std::string FunctionName, BlockName; + In >> FunctionName; + In >> BlockName; + if (!BlockName.empty()) + BlocksToNotExtractByName.push_back( + std::make_pair(FunctionName, BlockName)); + } +} + +bool BlockExtractorPass::runOnModule(Module &M) { + std::set<BasicBlock*> TranslatedBlocksToNotExtract; + for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { + BasicBlock *BB = BlocksToNotExtract[i]; + Function *F = BB->getParent(); + + // Map the corresponding function in this module. + Function *MF = M.getFunction(F->getName()); + assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?"); + + // Figure out which index the basic block is in its function. + Function::iterator BBI = MF->begin(); + std::advance(BBI, std::distance(F->begin(), Function::iterator(BB))); + TranslatedBlocksToNotExtract.insert(BBI); + } + + while (!BlocksToNotExtractByName.empty()) { + // There's no way to find BBs by name without looking at every BB inside + // every Function. Fortunately, this is always empty except when used by + // bugpoint in which case correctness is more important than performance. + + std::string &FuncName = BlocksToNotExtractByName.back().first; + std::string &BlockName = BlocksToNotExtractByName.back().second; + + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + Function &F = *FI; + if (F.getName() != FuncName) continue; + + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock &BB = *BI; + if (BB.getName() != BlockName) continue; + + TranslatedBlocksToNotExtract.insert(BI); + } + } + + BlocksToNotExtractByName.pop_back(); + } + + // Now that we know which blocks to not extract, figure out which ones we WANT + // to extract. + std::vector<BasicBlock*> BlocksToExtract; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (!TranslatedBlocksToNotExtract.count(BB)) + BlocksToExtract.push_back(BB); + + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) + ExtractBasicBlock(BlocksToExtract[i]); + + return !BlocksToExtract.empty(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp new file mode 100644 index 0000000..4d61e83 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp @@ -0,0 +1,541 @@ +//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering of setjmp and longjmp to use the +// LLVM invoke and unwind instructions as necessary. +// +// Lowering of longjmp is fairly trivial. We replace the call with a +// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()". +// This unwinds the stack for us calling all of the destructors for +// objects allocated on the stack. +// +// At a setjmp call, the basic block is split and the setjmp removed. +// The calls in a function that have a setjmp are converted to invoke +// where the except part checks to see if it's a longjmp exception and, +// if so, if it's handled in the function. If it is, then it gets the +// value returned by the longjmp and goes to where the basic block was +// split. Invoke instructions are handled in a similar fashion with the +// original except block being executed if it isn't a longjmp except +// that is handled by that function. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FIXME: This pass doesn't deal with PHI statements just yet. That is, +// we expect this to occur before SSAification is done. This would seem +// to make sense, but in general, it might be a good idea to make this +// pass invokable via the "opt" command at will. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowersetjmp" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include <map> +using namespace llvm; + +STATISTIC(LongJmpsTransformed, "Number of longjmps transformed"); +STATISTIC(SetJmpsTransformed , "Number of setjmps transformed"); +STATISTIC(CallsTransformed , "Number of calls invokified"); +STATISTIC(InvokesTransformed , "Number of invokes modified"); + +namespace { + //===--------------------------------------------------------------------===// + // LowerSetJmp pass implementation. + class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> { + // LLVM library functions... + Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap + Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap + Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map + Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp + Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception + Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception + Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value + + typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair; + + // Keep track of those basic blocks reachable via a depth-first search of + // the CFG from a setjmp call. We only need to transform those "call" and + // "invoke" instructions that are reachable from the setjmp call site. + std::set<BasicBlock*> DFSBlocks; + + // The setjmp map is going to hold information about which setjmps + // were called (each setjmp gets its own number) and with which + // buffer it was called. + std::map<Function*, AllocaInst*> SJMap; + + // The rethrow basic block map holds the basic block to branch to if + // the exception isn't handled in the current function and needs to + // be rethrown. + std::map<const Function*, BasicBlock*> RethrowBBMap; + + // The preliminary basic block map holds a basic block that grabs the + // exception and determines if it's handled by the current function. + std::map<const Function*, BasicBlock*> PrelimBBMap; + + // The switch/value map holds a switch inst/call inst pair. The + // switch inst controls which handler (if any) gets called and the + // value is the value returned to that handler by the call to + // __llvm_sjljeh_get_longjmp_value. + std::map<const Function*, SwitchValuePair> SwitchValMap; + + // A map of which setjmps we've seen so far in a function. + std::map<const Function*, unsigned> SetJmpIDMap; + + AllocaInst* GetSetJmpMap(Function* Func); + BasicBlock* GetRethrowBB(Function* Func); + SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow); + + void TransformLongJmpCall(CallInst* Inst); + void TransformSetJmpCall(CallInst* Inst); + + bool IsTransformableFunction(StringRef Name); + public: + static char ID; // Pass identification, replacement for typeid + LowerSetJmp() : ModulePass(&ID) {} + + void visitCallInst(CallInst& CI); + void visitInvokeInst(InvokeInst& II); + void visitReturnInst(ReturnInst& RI); + void visitUnwindInst(UnwindInst& UI); + + bool runOnModule(Module& M); + bool doInitialization(Module& M); + }; +} // end anonymous namespace + +char LowerSetJmp::ID = 0; +static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump"); + +// run - Run the transformation on the program. We grab the function +// prototypes for longjmp and setjmp. If they are used in the program, +// then we can go directly to the places they're at and transform them. +bool LowerSetJmp::runOnModule(Module& M) { + bool Changed = false; + + // These are what the functions are called. + Function* SetJmp = M.getFunction("llvm.setjmp"); + Function* LongJmp = M.getFunction("llvm.longjmp"); + + // This program doesn't have longjmp and setjmp calls. + if ((!LongJmp || LongJmp->use_empty()) && + (!SetJmp || SetJmp->use_empty())) return false; + + // Initialize some values and functions we'll need to transform the + // setjmp/longjmp functions. + doInitialization(M); + + if (SetJmp) { + for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); + B != E; ++B) { + BasicBlock* BB = cast<Instruction>(*B)->getParent(); + for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks), + E = df_ext_end(BB, DFSBlocks); I != E; ++I) + /* empty */; + } + + while (!SetJmp->use_empty()) { + assert(isa<CallInst>(SetJmp->use_back()) && + "User of setjmp intrinsic not a call?"); + TransformSetJmpCall(cast<CallInst>(SetJmp->use_back())); + Changed = true; + } + } + + if (LongJmp) + while (!LongJmp->use_empty()) { + assert(isa<CallInst>(LongJmp->use_back()) && + "User of longjmp intrinsic not a call?"); + TransformLongJmpCall(cast<CallInst>(LongJmp->use_back())); + Changed = true; + } + + // Now go through the affected functions and convert calls and invokes + // to new invokes... + for (std::map<Function*, AllocaInst*>::iterator + B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { + Function* F = B->first; + for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) + for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { + visit(*IB++); + if (IB != BB->end() && IB->getParent() != BB) + break; // The next instruction got moved to a different block! + } + } + + DFSBlocks.clear(); + SJMap.clear(); + RethrowBBMap.clear(); + PrelimBBMap.clear(); + SwitchValMap.clear(); + SetJmpIDMap.clear(); + + return Changed; +} + +// doInitialization - For the lower long/setjmp pass, this ensures that a +// module contains a declaration for the intrisic functions we are going +// to call to convert longjmp and setjmp calls. +// +// This function is always successful, unless it isn't. +bool LowerSetJmp::doInitialization(Module& M) +{ + const Type *SBPTy = Type::getInt8PtrTy(M.getContext()); + const Type *SBPPTy = PointerType::getUnqual(SBPTy); + + // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for + // a description of the following library functions. + + // void __llvm_sjljeh_init_setjmpmap(void**) + InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", + Type::getVoidTy(M.getContext()), + SBPPTy, (Type *)0); + // void __llvm_sjljeh_destroy_setjmpmap(void**) + DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", + Type::getVoidTy(M.getContext()), + SBPPTy, (Type *)0); + + // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) + AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", + Type::getVoidTy(M.getContext()), + SBPPTy, SBPTy, + Type::getInt32Ty(M.getContext()), + (Type *)0); + + // void __llvm_sjljeh_throw_longjmp(int*, int) + ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", + Type::getVoidTy(M.getContext()), SBPTy, + Type::getInt32Ty(M.getContext()), + (Type *)0); + + // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) + TryCatchLJ = + M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", + Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0); + + // bool __llvm_sjljeh_is_longjmp_exception() + IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", + Type::getInt1Ty(M.getContext()), + (Type *)0); + + // int __llvm_sjljeh_get_longjmp_value() + GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", + Type::getInt32Ty(M.getContext()), + (Type *)0); + return true; +} + +// IsTransformableFunction - Return true if the function name isn't one +// of the ones we don't want transformed. Currently, don't transform any +// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error +// handling functions (beginning with __llvm_sjljeh_...they don't throw +// exceptions). +bool LowerSetJmp::IsTransformableFunction(StringRef Name) { + return !Name.startswith("__llvm_sjljeh_"); +} + +// TransformLongJmpCall - Transform a longjmp call into a call to the +// internal __llvm_sjljeh_throw_longjmp function. It then takes care of +// throwing the exception for us. +void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) +{ + const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext()); + + // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the + // same parameters as "longjmp", except that the buffer is cast to a + // char*. It returns "void", so it doesn't need to replace any of + // Inst's uses and doesn't get a name. + CastInst* CI = + new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst); + Value *Args[] = { CI, Inst->getOperand(2) }; + CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); + + SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; + + // If the function has a setjmp call in it (they are transformed first) + // we should branch to the basic block that determines if this longjmp + // is applicable here. Otherwise, issue an unwind. + if (SVP.first) + BranchInst::Create(SVP.first->getParent(), Inst); + else + new UnwindInst(Inst->getContext(), Inst); + + // Remove all insts after the branch/unwind inst. Go from back to front to + // avoid replaceAllUsesWith if possible. + BasicBlock *BB = Inst->getParent(); + Instruction *Removed; + do { + Removed = &BB->back(); + // If the removed instructions have any users, replace them now. + if (!Removed->use_empty()) + Removed->replaceAllUsesWith(UndefValue::get(Removed->getType())); + Removed->eraseFromParent(); + } while (Removed != Inst); + + ++LongJmpsTransformed; +} + +// GetSetJmpMap - Retrieve (create and initialize, if necessary) the +// setjmp map. This map is going to hold information about which setjmps +// were called (each setjmp gets its own number) and with which buffer it +// was called. There can be only one! +AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) +{ + if (SJMap[Func]) return SJMap[Func]; + + // Insert the setjmp map initialization before the first instruction in + // the function. + Instruction* Inst = Func->getEntryBlock().begin(); + assert(Inst && "Couldn't find even ONE instruction in entry block!"); + + // Fill in the alloca and call to initialize the SJ map. + const Type *SBPTy = + Type::getInt8PtrTy(Func->getContext()); + AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); + CallInst::Create(InitSJMap, Map, "", Inst); + return SJMap[Func] = Map; +} + +// GetRethrowBB - Only one rethrow basic block is needed per function. +// If this is a longjmp exception but not handled in this block, this BB +// performs the rethrow. +BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) +{ + if (RethrowBBMap[Func]) return RethrowBBMap[Func]; + + // The basic block we're going to jump to if we need to rethrow the + // exception. + BasicBlock* Rethrow = + BasicBlock::Create(Func->getContext(), "RethrowExcept", Func); + + // Fill in the "Rethrow" BB with a call to rethrow the exception. This + // is the last instruction in the BB since at this point the runtime + // should exit this function and go to the next function. + new UnwindInst(Func->getContext(), Rethrow); + return RethrowBBMap[Func] = Rethrow; +} + +// GetSJSwitch - Return the switch statement that controls which handler +// (if any) gets called and the value returned to that handler. +LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, + BasicBlock* Rethrow) +{ + if (SwitchValMap[Func].first) return SwitchValMap[Func]; + + BasicBlock* LongJmpPre = + BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func); + + // Keep track of the preliminary basic block for some of the other + // transformations. + PrelimBBMap[Func] = LongJmpPre; + + // Grab the exception. + CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre); + + // The "decision basic block" gets the number associated with the + // setjmp call returning to switch on and the value returned by + // longjmp. + BasicBlock* DecisionBB = + BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func); + + BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); + + // Fill in the "decision" basic block. + CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB); + CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum", + DecisionBB); + + SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB); + return SwitchValMap[Func] = SwitchValuePair(SI, LJVal); +} + +// TransformSetJmpCall - The setjmp call is a bit trickier to transform. +// We're going to convert all setjmp calls to nops. Then all "call" and +// "invoke" instructions in the function are converted to "invoke" where +// the "except" branch is used when returning from a longjmp call. +void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) +{ + BasicBlock* ABlock = Inst->getParent(); + Function* Func = ABlock->getParent(); + + // Add this setjmp to the setjmp map. + const Type* SBPTy = + Type::getInt8PtrTy(Inst->getContext()); + CastInst* BufPtr = + new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); + Value *Args[] = { + GetSetJmpMap(Func), BufPtr, + ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) + }; + CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst); + + // We are guaranteed that there are no values live across basic blocks + // (because we are "not in SSA form" yet), but there can still be values live + // in basic blocks. Because of this, splitting the setjmp block can cause + // values above the setjmp to not dominate uses which are after the setjmp + // call. For all of these occasions, we must spill the value to the stack. + // + std::set<Instruction*> InstrsAfterCall; + + // The call is probably very close to the end of the basic block, for the + // common usage pattern of: 'if (setjmp(...))', so keep track of the + // instructions after the call. + for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end(); + I != E; ++I) + InstrsAfterCall.insert(I); + + for (BasicBlock::iterator II = ABlock->begin(); + II != BasicBlock::iterator(Inst); ++II) + // Loop over all of the uses of instruction. If any of them are after the + // call, "spill" the value to the stack. + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != ABlock || + InstrsAfterCall.count(cast<Instruction>(*UI))) { + DemoteRegToStack(*II); + break; + } + InstrsAfterCall.clear(); + + // Change the setjmp call into a branch statement. We'll remove the + // setjmp call in a little bit. No worries. + BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst); + assert(SetJmpContBlock && "Couldn't split setjmp BB!!"); + + SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont"); + + // Add the SetJmpContBlock to the set of blocks reachable from a setjmp. + DFSBlocks.insert(SetJmpContBlock); + + // This PHI node will be in the new block created from the + // splitBasicBlock call. + PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), + "SetJmpReturn", Inst); + + // Coming from a call to setjmp, the return is 0. + PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())), + ABlock); + + // Add the case for this setjmp's number... + SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); + SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()), + SetJmpIDMap[Func] - 1), + SetJmpContBlock); + + // Value coming from the handling of the exception. + PHI->addIncoming(SVP.second, SVP.second->getParent()); + + // Replace all uses of this instruction with the PHI node created by + // the eradication of setjmp. + Inst->replaceAllUsesWith(PHI); + Inst->eraseFromParent(); + + ++SetJmpsTransformed; +} + +// visitCallInst - This converts all LLVM call instructions into invoke +// instructions. The except part of the invoke goes to the "LongJmpBlkPre" +// that grabs the exception and proceeds to determine if it's a longjmp +// exception or not. +void LowerSetJmp::visitCallInst(CallInst& CI) +{ + if (CI.getCalledFunction()) + if (!IsTransformableFunction(CI.getCalledFunction()->getName()) || + CI.getCalledFunction()->isIntrinsic()) return; + + BasicBlock* OldBB = CI.getParent(); + + // If not reachable from a setjmp call, don't transform. + if (!DFSBlocks.count(OldBB)) return; + + BasicBlock* NewBB = OldBB->splitBasicBlock(CI); + assert(NewBB && "Couldn't split BB of \"call\" instruction!!"); + DFSBlocks.insert(NewBB); + NewBB->setName("Call2Invoke"); + + Function* Func = OldBB->getParent(); + + // Construct the new "invoke" instruction. + TerminatorInst* Term = OldBB->getTerminator(); + std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end()); + InvokeInst* II = + InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], + Params.begin(), Params.end(), CI.getName(), Term); + II->setCallingConv(CI.getCallingConv()); + II->setAttributes(CI.getAttributes()); + + // Replace the old call inst with the invoke inst and remove the call. + CI.replaceAllUsesWith(II); + CI.eraseFromParent(); + + // The old terminator is useless now that we have the invoke inst. + Term->eraseFromParent(); + ++CallsTransformed; +} + +// visitInvokeInst - Converting the "invoke" instruction is fairly +// straight-forward. The old exception part is replaced by a query asking +// if this is a longjmp exception. If it is, then it goes to the longjmp +// exception blocks. Otherwise, control is passed the old exception. +void LowerSetJmp::visitInvokeInst(InvokeInst& II) +{ + if (II.getCalledFunction()) + if (!IsTransformableFunction(II.getCalledFunction()->getName()) || + II.getCalledFunction()->isIntrinsic()) return; + + BasicBlock* BB = II.getParent(); + + // If not reachable from a setjmp call, don't transform. + if (!DFSBlocks.count(BB)) return; + + BasicBlock* ExceptBB = II.getUnwindDest(); + + Function* Func = BB->getParent(); + BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), + "InvokeExcept", Func); + + // If this is a longjmp exception, then branch to the preliminary BB of + // the longjmp exception handling. Otherwise, go to the old exception. + CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept", + NewExceptBB); + + BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB); + + II.setUnwindDest(NewExceptBB); + ++InvokesTransformed; +} + +// visitReturnInst - We want to destroy the setjmp map upon exit from the +// function. +void LowerSetJmp::visitReturnInst(ReturnInst &RI) { + Function* Func = RI.getParent()->getParent(); + CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI); +} + +// visitUnwindInst - We want to destroy the setjmp map upon exit from the +// function. +void LowerSetJmp::visitUnwindInst(UnwindInst &UI) { + Function* Func = UI.getParent()->getParent(); + CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI); +} + +ModulePass *llvm::createLowerSetJmpPass() { + return new LowerSetJmp(); +} + diff --git a/contrib/llvm/lib/Transforms/IPO/Makefile b/contrib/llvm/lib/Transforms/IPO/Makefile new file mode 100644 index 0000000..5c42374 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMipo +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp new file mode 100644 index 0000000..622a9b5 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -0,0 +1,748 @@ +//===- MergeFunctions.cpp - Merge identical functions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for equivalent functions that are mergable and folds them. +// +// A hash is computed from the function, based on its type and number of +// basic blocks. +// +// Once all hashes are computed, we perform an expensive equality comparison +// on each function pair. This takes n^2/2 comparisons per bucket, so it's +// important that the hash function be high quality. The equality comparison +// iterates through each instruction in each basic block. +// +// When a match is found the functions are folded. If both functions are +// overridable, we move the functionality into a new internal function and +// leave two overridable thunks to it. +// +//===----------------------------------------------------------------------===// +// +// Future work: +// +// * virtual functions. +// +// Many functions have their address taken by the virtual function table for +// the object they belong to. However, as long as it's only used for a lookup +// and call, this is irrelevant, and we'd like to fold such implementations. +// +// * use SCC to cut down on pair-wise comparisons and solve larger cycles. +// +// The current implementation loops over a pair-wise comparison of all +// functions in the program where the two functions in the pair are treated as +// assumed to be equal until proven otherwise. We could both use fewer +// comparisons and optimize more complex cases if we used strongly connected +// components of the call graph. +// +// * be smarter about bitcast. +// +// In order to fold functions, we will sometimes add either bitcast instructions +// or bitcast constant expressions. Unfortunately, this can confound further +// analysis since the two functions differ where one has a bitcast and the +// other doesn't. We should learn to peer through bitcasts without imposing bad +// performance properties. +// +// * don't emit aliases for Mach-O. +// +// Mach-O doesn't support aliases which means that we must avoid introducing +// them in the bitcode on architectures which don't support them, such as +// Mac OSX. There's a few approaches to this problem; +// a) teach codegen to lower global aliases to thunks on platforms which don't +// support them. +// b) always emit thunks, and create a separate thunk-to-alias pass which +// runs on ELF systems. This has the added benefit of transforming other +// thunks such as those produced by a C++ frontend into aliases when legal +// to do so. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mergefunc" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include <map> +#include <vector> +using namespace llvm; + +STATISTIC(NumFunctionsMerged, "Number of functions merged"); + +namespace { + class MergeFunctions : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + MergeFunctions() : ModulePass(&ID) {} + + bool runOnModule(Module &M); + + private: + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2); + + bool equals(const BasicBlock *BB1, const BasicBlock *BB2); + bool equals(const Function *F, const Function *G); + + bool compare(const Value *V1, const Value *V2); + + const Function *LHS, *RHS; + typedef DenseMap<const Value *, unsigned long> IDMap; + IDMap Map; + DenseMap<const Function *, IDMap> Domains; + DenseMap<const Function *, unsigned long> DomainCount; + TargetData *TD; + }; +} + +char MergeFunctions::ID = 0; +static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions"); + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +// ===----------------------------------------------------------------------=== +// Comparison of functions +// ===----------------------------------------------------------------------=== + +static unsigned long hash(const Function *F) { + const FunctionType *FTy = F->getFunctionType(); + + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); +} + +/// isEquivalentType - any two pointers are equivalent. Otherwise, standard +/// type equivalence rules apply. +static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { + if (Ty1 == Ty2) + return true; + if (Ty1->getTypeID() != Ty2->getTypeID()) + return false; + + switch(Ty1->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release-Asserts mode. + case Type::IntegerTyID: + case Type::OpaqueTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + return true; + + case Type::PointerTyID: { + const PointerType *PTy1 = cast<PointerType>(Ty1); + const PointerType *PTy2 = cast<PointerType>(Ty2); + return PTy1->getAddressSpace() == PTy2->getAddressSpace(); + } + + case Type::StructTyID: { + const StructType *STy1 = cast<StructType>(Ty1); + const StructType *STy2 = cast<StructType>(Ty2); + if (STy1->getNumElements() != STy2->getNumElements()) + return false; + + if (STy1->isPacked() != STy2->isPacked()) + return false; + + for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i))) + return false; + } + return true; + } + + case Type::UnionTyID: { + const UnionType *UTy1 = cast<UnionType>(Ty1); + const UnionType *UTy2 = cast<UnionType>(Ty2); + + // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc. + if (UTy1->getNumElements() != UTy2->getNumElements()) + return false; + + for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) + return false; + } + return true; + } + + case Type::FunctionTyID: { + const FunctionType *FTy1 = cast<FunctionType>(Ty1); + const FunctionType *FTy2 = cast<FunctionType>(Ty2); + if (FTy1->getNumParams() != FTy2->getNumParams() || + FTy1->isVarArg() != FTy2->isVarArg()) + return false; + + if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType())) + return false; + + for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) { + if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i))) + return false; + } + return true; + } + + case Type::ArrayTyID: + case Type::VectorTyID: { + const SequentialType *STy1 = cast<SequentialType>(Ty1); + const SequentialType *STy2 = cast<SequentialType>(Ty2); + return isEquivalentType(STy1->getElementType(), STy2->getElementType()); + } + } +} + +/// isEquivalentOperation - determine whether the two operations are the same +/// except that pointer-to-A and pointer-to-B are equivalent. This should be +/// kept in sync with Instruction::isSameOperationAs. +static bool +isEquivalentOperation(const Instruction *I1, const Instruction *I2) { + if (I1->getOpcode() != I2->getOpcode() || + I1->getNumOperands() != I2->getNumOperands() || + !isEquivalentType(I1->getType(), I2->getType()) || + !I1->hasSameSubclassOptionalData(I2)) + return false; + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) + if (!isEquivalentType(I1->getOperand(i)->getType(), + I2->getOperand(i)->getType())) + return false; + + // Check special state that is a part of some instructions. + if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) + return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && + LI->getAlignment() == cast<LoadInst>(I2)->getAlignment(); + if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) + return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && + SI->getAlignment() == cast<StoreInst>(I2)->getAlignment(); + if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) + return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); + if (const CallInst *CI = dyn_cast<CallInst>(I1)) + return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && + CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes().getRawPointer() == + cast<CallInst>(I2)->getAttributes().getRawPointer(); + if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) + return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && + CI->getAttributes().getRawPointer() == + cast<InvokeInst>(I2)->getAttributes().getRawPointer(); + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { + if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) + return false; + for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) + if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i]) + return false; + return true; + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) { + if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices()) + return false; + for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) + if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i]) + return false; + return true; + } + + return true; +} + +bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { + SmallVector<Value *, 8> Indices1, Indices2; + for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(), + E = GEP1->idx_end(); I != E; ++I) { + Indices1.push_back(*I); + } + for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(), + E = GEP2->idx_end(); I != E; ++I) { + Indices2.push_back(*I); + } + uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), + Indices1.data(), Indices1.size()); + uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), + Indices2.data(), Indices2.size()); + return Offset1 == Offset2; + } + + // Equivalent types aren't enough. + if (GEP1->getPointerOperand()->getType() != + GEP2->getPointerOperand()->getType()) + return false; + + if (GEP1->getNumOperands() != GEP2->getNumOperands()) + return false; + + for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { + if (!compare(GEP1->getOperand(i), GEP2->getOperand(i))) + return false; + } + + return true; +} + +bool MergeFunctions::compare(const Value *V1, const Value *V2) { + if (V1 == LHS || V1 == RHS) + if (V2 == LHS || V2 == RHS) + return true; + + // TODO: constant expressions in terms of LHS and RHS + if (isa<Constant>(V1)) + return V1 == V2; + + if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { + const InlineAsm *IA1 = cast<InlineAsm>(V1); + const InlineAsm *IA2 = cast<InlineAsm>(V2); + return IA1->getAsmString() == IA2->getAsmString() && + IA1->getConstraintString() == IA2->getConstraintString(); + } + + // We enumerate constants globally and arguments, basic blocks or + // instructions within the function they belong to. + const Function *Domain1 = NULL; + if (const Argument *A = dyn_cast<Argument>(V1)) { + Domain1 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) { + Domain1 = BB->getParent(); + } else if (const Instruction *I = dyn_cast<Instruction>(V1)) { + Domain1 = I->getParent()->getParent(); + } + + const Function *Domain2 = NULL; + if (const Argument *A = dyn_cast<Argument>(V2)) { + Domain2 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) { + Domain2 = BB->getParent(); + } else if (const Instruction *I = dyn_cast<Instruction>(V2)) { + Domain2 = I->getParent()->getParent(); + } + + if (Domain1 != Domain2) + if (Domain1 != LHS && Domain1 != RHS) + if (Domain2 != LHS && Domain2 != RHS) + return false; + + IDMap &Map1 = Domains[Domain1]; + unsigned long &ID1 = Map1[V1]; + if (!ID1) + ID1 = ++DomainCount[Domain1]; + + IDMap &Map2 = Domains[Domain2]; + unsigned long &ID2 = Map2[V2]; + if (!ID2) + ID2 = ++DomainCount[Domain2]; + + return ID1 == ID2; +} + +bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) { + BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); + BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); + + do { + if (!compare(FI, GI)) + return false; + + if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) { + const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI); + const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI); + + if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + return false; + + if (!isEquivalentGEP(GEP1, GEP2)) + return false; + } else { + if (!isEquivalentOperation(FI, GI)) + return false; + + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + Value *OpF = FI->getOperand(i); + Value *OpG = GI->getOperand(i); + + if (!compare(OpF, OpG)) + return false; + + if (OpF->getValueID() != OpG->getValueID() || + !isEquivalentType(OpF->getType(), OpG->getType())) + return false; + } + } + + ++FI, ++GI; + } while (FI != FE && GI != GE); + + return FI == FE && GI == GE; +} + +bool MergeFunctions::equals(const Function *F, const Function *G) { + // We need to recheck everything, but check the things that weren't included + // in the hash first. + + if (F->getAttributes() != G->getAttributes()) + return false; + + if (F->hasGC() != G->hasGC()) + return false; + + if (F->hasGC() && F->getGC() != G->getGC()) + return false; + + if (F->hasSection() != G->hasSection()) + return false; + + if (F->hasSection() && F->getSection() != G->getSection()) + return false; + + if (F->isVarArg() != G->isVarArg()) + return false; + + // TODO: if it's internal and only used in direct calls, we could handle this + // case too. + if (F->getCallingConv() != G->getCallingConv()) + return false; + + if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) + return false; + + assert(F->arg_size() == G->arg_size() && + "Identical functions have a different number of args."); + + LHS = F; + RHS = G; + + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), + fe = F->arg_end(); fi != fe; ++fi, ++gi) { + if (!compare(fi, gi)) + llvm_unreachable("Arguments repeat"); + } + + SmallVector<const BasicBlock *, 8> FBBs, GBBs; + SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F. + FBBs.push_back(&F->getEntryBlock()); + GBBs.push_back(&G->getEntryBlock()); + VisitedBBs.insert(FBBs[0]); + while (!FBBs.empty()) { + const BasicBlock *FBB = FBBs.pop_back_val(); + const BasicBlock *GBB = GBBs.pop_back_val(); + if (!compare(FBB, GBB) || !equals(FBB, GBB)) { + Domains.clear(); + DomainCount.clear(); + return false; + } + const TerminatorInst *FTI = FBB->getTerminator(); + const TerminatorInst *GTI = GBB->getTerminator(); + assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); + for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(FTI->getSuccessor(i))) + continue; + FBBs.push_back(FTI->getSuccessor(i)); + GBBs.push_back(GTI->getSuccessor(i)); + } + } + + Domains.clear(); + DomainCount.clear(); + return true; +} + +// ===----------------------------------------------------------------------=== +// Folding of functions +// ===----------------------------------------------------------------------=== + +// Cases: +// * F is external strong, G is external strong: +// turn G into a thunk to F (1) +// * F is external strong, G is external weak: +// turn G into a thunk to F (1) +// * F is external weak, G is external weak: +// unfoldable +// * F is external strong, G is internal: +// address of G taken: +// turn G into a thunk to F (1) +// address of G not taken: +// make G an alias to F (2) +// * F is internal, G is external weak +// address of F is taken: +// turn G into a thunk to F (1) +// address of F is not taken: +// make G an alias of F (2) +// * F is internal, G is internal: +// address of F and G are taken: +// turn G into a thunk to F (1) +// address of G is not taken: +// make G an alias to F (2) +// +// alias requires linkage == (external,local,weak) fallback to creating a thunk +// external means 'externally visible' linkage != (internal,private) +// internal means linkage == (internal,private) +// weak means linkage mayBeOverridable +// being external implies that the address is taken +// +// 1. turn G into a thunk to F +// 2. make G an alias to F + +enum LinkageCategory { + ExternalStrong, + ExternalWeak, + Internal +}; + +static LinkageCategory categorize(const Function *F) { + switch (F->getLinkage()) { + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: + return Internal; + + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::ExternalWeakLinkage: + return ExternalWeak; + + case GlobalValue::ExternalLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::AppendingLinkage: + case GlobalValue::DLLImportLinkage: + case GlobalValue::DLLExportLinkage: + case GlobalValue::CommonLinkage: + return ExternalStrong; + } + + llvm_unreachable("Unknown LinkageType."); + return ExternalWeak; +} + +static void ThunkGToF(Function *F, Function *G) { + if (!G->mayBeOverridden()) { + // Redirect direct callers of G to F. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) + TheIter.getUse().set(BitcastF); + } + } + + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", + G->getParent()); + BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); + + SmallVector<Value *, 16> Args; + unsigned i = 0; + const FunctionType *FFTy = F->getFunctionType(); + for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); + AI != AE; ++AI) { + if (FFTy->getParamType(i) == AI->getType()) { + Args.push_back(AI); + } else { + Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB)); + } + ++i; + } + + CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); + CI->setTailCall(); + CI->setCallingConv(F->getCallingConv()); + if (NewG->getReturnType()->isVoidTy()) { + ReturnInst::Create(F->getContext(), BB); + } else if (CI->getType() != NewG->getReturnType()) { + Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); + ReturnInst::Create(F->getContext(), BCI, BB); + } else { + ReturnInst::Create(F->getContext(), CI, BB); + } + + NewG->copyAttributesFrom(G); + NewG->takeName(G); + G->replaceAllUsesWith(NewG); + G->eraseFromParent(); +} + +static void AliasGToF(Function *F, Function *G) { + if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) + return ThunkGToF(F, G); + + GlobalAlias *GA = new GlobalAlias( + G->getType(), G->getLinkage(), "", + ConstantExpr::getBitCast(F, G->getType()), G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); +} + +static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { + Function *F = FnVec[i]; + Function *G = FnVec[j]; + + LinkageCategory catF = categorize(F); + LinkageCategory catG = categorize(G); + + if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(catF, catG); + } + + switch (catF) { + case ExternalStrong: + switch (catG) { + case ExternalStrong: + case ExternalWeak: + ThunkGToF(F, G); + break; + case Internal: + if (G->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + } + break; + + case ExternalWeak: { + assert(catG == ExternalWeak); + + // Make them both thunks to the same internal function. + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + F->replaceAllUsesWith(H); + + ThunkGToF(F, G); + ThunkGToF(F, H); + + F->setLinkage(GlobalValue::InternalLinkage); + } break; + + case Internal: + switch (catG) { + case ExternalStrong: + llvm_unreachable(0); + // fall-through + case ExternalWeak: + if (F->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + case Internal: { + bool addrTakenF = F->hasAddressTaken(); + bool addrTakenG = G->hasAddressTaken(); + if (!addrTakenF && addrTakenG) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(addrTakenF, addrTakenG); + } + + if (addrTakenF && addrTakenG) { + ThunkGToF(F, G); + } else { + assert(!addrTakenG); + AliasGToF(F, G); + } + } break; + } break; + } + + ++NumFunctionsMerged; + return true; +} + +// ===----------------------------------------------------------------------=== +// Pass definition +// ===----------------------------------------------------------------------=== + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + + std::map<unsigned long, std::vector<Function *> > FnMap; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) + continue; + + FnMap[hash(F)].push_back(F); + } + + TD = getAnalysisIfAvailable<TargetData>(); + + bool LocalChanged; + do { + LocalChanged = false; + DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); + for (std::map<unsigned long, std::vector<Function *> >::iterator + I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { + std::vector<Function *> &FnVec = I->second; + DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); + + for (int i = 0, e = FnVec.size(); i != e; ++i) { + for (int j = i + 1; j != e; ++j) { + bool isEqual = equals(FnVec[i], FnVec[j]); + + DEBUG(dbgs() << " " << FnVec[i]->getName() + << (isEqual ? " == " : " != ") + << FnVec[j]->getName() << "\n"); + + if (isEqual) { + if (fold(FnVec, i, j)) { + LocalChanged = true; + FnVec.erase(FnVec.begin() + j); + --j, --e; + } + } + } + } + + } + Changed |= LocalChanged; + } while (LocalChanged); + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp new file mode 100644 index 0000000..07525ea --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -0,0 +1,178 @@ +//===- PartialInlining.cpp - Inline parts of functions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs partial inlining, typically by inlining an if statement +// that surrounds the body of the function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "partialinlining" +#include "llvm/Transforms/IPO.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +using namespace llvm; + +STATISTIC(NumPartialInlined, "Number of functions partially inlined"); + +namespace { + struct PartialInliner : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + static char ID; // Pass identification, replacement for typeid + PartialInliner() : ModulePass(&ID) {} + + bool runOnModule(Module& M); + + private: + Function* unswitchFunction(Function* F); + }; +} + +char PartialInliner::ID = 0; +static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner"); + +ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } + +Function* PartialInliner::unswitchFunction(Function* F) { + // First, verify that this function is an unswitching candidate... + BasicBlock* entryBlock = F->begin(); + BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); + if (!BR || BR->isUnconditional()) + return 0; + + BasicBlock* returnBlock = 0; + BasicBlock* nonReturnBlock = 0; + unsigned returnCount = 0; + for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); + SI != SE; ++SI) + if (isa<ReturnInst>((*SI)->getTerminator())) { + returnBlock = *SI; + returnCount++; + } else + nonReturnBlock = *SI; + + if (returnCount != 1) + return 0; + + // Clone the function, so that we can hack away on it. + DenseMap<const Value*, Value*> ValueMap; + Function* duplicateFunction = CloneFunction(F, ValueMap); + duplicateFunction->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(duplicateFunction); + BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]); + BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]); + + // Go ahead and update all uses to the duplicate, so that we can just + // use the inliner functionality when we're done hacking. + F->replaceAllUsesWith(duplicateFunction); + + // Special hackery is needed with PHI nodes that have inputs from more than + // one extracted block. For simplicity, just split the PHIs into a two-level + // sequence of PHIs, some of which will go in the extracted region, and some + // of which will go outside. + BasicBlock* preReturn = newReturnBlock; + newReturnBlock = newReturnBlock->splitBasicBlock( + newReturnBlock->getFirstNonPHI()); + BasicBlock::iterator I = preReturn->begin(); + BasicBlock::iterator Ins = newReturnBlock->begin(); + while (I != preReturn->end()) { + PHINode* OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) break; + + PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + OldPhi->replaceAllUsesWith(retPhi); + Ins = newReturnBlock->getFirstNonPHI(); + + retPhi->addIncoming(I, preReturn); + retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), + newEntryBlock); + OldPhi->removeIncomingValue(newEntryBlock); + + ++I; + } + newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); + + // Gather up the blocks that we're going to extract. + std::vector<BasicBlock*> toExtract; + toExtract.push_back(newNonReturnBlock); + for (Function::iterator FI = duplicateFunction->begin(), + FE = duplicateFunction->end(); FI != FE; ++FI) + if (&*FI != newEntryBlock && &*FI != newReturnBlock && + &*FI != newNonReturnBlock) + toExtract.push_back(FI); + + // The CodeExtractor needs a dominator tree. + DominatorTree DT; + DT.runOnFunction(*duplicateFunction); + + // Extract the body of the if. + Function* extractedFunction = ExtractCodeRegion(DT, toExtract); + + InlineFunctionInfo IFI; + + // Inline the top-level if test into all callers. + std::vector<User*> Users(duplicateFunction->use_begin(), + duplicateFunction->use_end()); + for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); + UI != UE; ++UI) + if (CallInst *CI = dyn_cast<CallInst>(*UI)) + InlineFunction(CI, IFI); + else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) + InlineFunction(II, IFI); + + // Ditch the duplicate, since we're done with it, and rewrite all remaining + // users (function pointers, etc.) back to the original function. + duplicateFunction->replaceAllUsesWith(F); + duplicateFunction->eraseFromParent(); + + ++NumPartialInlined; + + return extractedFunction; +} + +bool PartialInliner::runOnModule(Module& M) { + std::vector<Function*> worklist; + worklist.reserve(M.size()); + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) + if (!FI->use_empty() && !FI->isDeclaration()) + worklist.push_back(&*FI); + + bool changed = false; + while (!worklist.empty()) { + Function* currFunc = worklist.back(); + worklist.pop_back(); + + if (currFunc->use_empty()) continue; + + bool recursive = false; + for (Function::use_iterator UI = currFunc->use_begin(), + UE = currFunc->use_end(); UI != UE; ++UI) + if (Instruction* I = dyn_cast<Instruction>(UI)) + if (I->getParent()->getParent() == currFunc) { + recursive = true; + break; + } + if (recursive) continue; + + + if (Function* newFunc = unswitchFunction(currFunc)) { + worklist.push_back(newFunc); + changed = true; + } + + } + + return changed; +} diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp new file mode 100644 index 0000000..084b94e --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp @@ -0,0 +1,190 @@ +//===-- PartialSpecialization.cpp - Specialize for common constants--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass finds function arguments that are often a common constant and +// specializes a version of the called function for that constant. +// +// This pass simply does the cloning for functions it specializes. It depends +// on IPSCCP and DAE to clean up the results. +// +// The initial heuristic favors constant arguments that are used in control +// flow. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "partialspecialization" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Support/CallSite.h" +#include "llvm/ADT/DenseSet.h" +#include <map> +using namespace llvm; + +STATISTIC(numSpecialized, "Number of specialized functions created"); + +// Call must be used at least occasionally +static const int CallsMin = 5; + +// Must have 10% of calls having the same constant to specialize on +static const double ConstValPercent = .1; + +namespace { + class PartSpec : public ModulePass { + void scanForInterest(Function&, SmallVector<int, 6>&); + int scanDistribution(Function&, int, std::map<Constant*, int>&); + public : + static char ID; // Pass identification, replacement for typeid + PartSpec() : ModulePass(&ID) {} + bool runOnModule(Module &M); + }; +} + +char PartSpec::ID = 0; +static RegisterPass<PartSpec> +X("partialspecialization", "Partial Specialization"); + +// Specialize F by replacing the arguments (keys) in replacements with the +// constants (values). Replace all calls to F with those constants with +// a call to the specialized function. Returns the specialized function +static Function* +SpecializeFunction(Function* F, + DenseMap<const Value*, Value*>& replacements) { + // arg numbers of deleted arguments + DenseSet<unsigned> deleted; + for (DenseMap<const Value*, Value*>::iterator + repb = replacements.begin(), repe = replacements.end(); + repb != repe; ++repb) + deleted.insert(cast<Argument>(repb->first)->getArgNo()); + + Function* NF = CloneFunction(F, replacements); + NF->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(NF); + + for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); + ii != ee; ) { + Value::use_iterator i = ii; + ++ii; + if (isa<CallInst>(i) || isa<InvokeInst>(i)) { + CallSite CS(cast<Instruction>(i)); + if (CS.getCalledFunction() == F) { + + SmallVector<Value*, 6> args; + for (unsigned x = 0; x < CS.arg_size(); ++x) + if (!deleted.count(x)) + args.push_back(CS.getArgument(x)); + Value* NCall; + if (CallInst *CI = dyn_cast<CallInst>(i)) { + NCall = CallInst::Create(NF, args.begin(), args.end(), + CI->getName(), CI); + cast<CallInst>(NCall)->setTailCall(CI->isTailCall()); + cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv()); + } else { + InvokeInst *II = cast<InvokeInst>(i); + NCall = InvokeInst::Create(NF, II->getNormalDest(), + II->getUnwindDest(), + args.begin(), args.end(), + II->getName(), II); + cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv()); + } + CS.getInstruction()->replaceAllUsesWith(NCall); + CS.getInstruction()->eraseFromParent(); + } + } + } + return NF; +} + + +bool PartSpec::runOnModule(Module &M) { + bool Changed = false; + for (Module::iterator I = M.begin(); I != M.end(); ++I) { + Function &F = *I; + if (F.isDeclaration() || F.mayBeOverridden()) continue; + SmallVector<int, 6> interestingArgs; + scanForInterest(F, interestingArgs); + + // Find the first interesting Argument that we can specialize on + // If there are multiple interesting Arguments, then those will be found + // when processing the cloned function. + bool breakOuter = false; + for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) { + std::map<Constant*, int> distribution; + int total = scanDistribution(F, interestingArgs[x], distribution); + if (total > CallsMin) + for (std::map<Constant*, int>::iterator ii = distribution.begin(), + ee = distribution.end(); ii != ee; ++ii) + if (total > ii->second && ii->first && + ii->second > total * ConstValPercent) { + DenseMap<const Value*, Value*> m; + Function::arg_iterator arg = F.arg_begin(); + for (int y = 0; y < interestingArgs[x]; ++y) + ++arg; + m[&*arg] = ii->first; + SpecializeFunction(&F, m); + ++numSpecialized; + breakOuter = true; + Changed = true; + } + } + } + return Changed; +} + +/// scanForInterest - This function decides which arguments would be worth +/// specializing on. +void PartSpec::scanForInterest(Function& F, SmallVector<int, 6>& args) { + for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); + ii != ee; ++ii) { + for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); + ui != ue; ++ui) { + + bool interesting = false; + + if (isa<CmpInst>(ui)) interesting = true; + else if (isa<CallInst>(ui)) + interesting = ui->getOperand(0) == ii; + else if (isa<InvokeInst>(ui)) + interesting = ui->getOperand(0) == ii; + else if (isa<SwitchInst>(ui)) interesting = true; + else if (isa<BranchInst>(ui)) interesting = true; + + if (interesting) { + args.push_back(std::distance(F.arg_begin(), ii)); + break; + } + } + } +} + +/// scanDistribution - Construct a histogram of constants for arg of F at arg. +int PartSpec::scanDistribution(Function& F, int arg, + std::map<Constant*, int>& dist) { + bool hasIndirect = false; + int total = 0; + for(Value::use_iterator ii = F.use_begin(), ee = F.use_end(); + ii != ee; ++ii) + if ((isa<CallInst>(ii) || isa<InvokeInst>(ii)) + && ii->getOperand(0) == &F) { + ++dist[dyn_cast<Constant>(ii->getOperand(arg + 1))]; + ++total; + } else + hasIndirect = true; + + // Preserve the original address taken function even if all other uses + // will be specialized. + if (hasIndirect) ++total; + return total; +} + +ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); } diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp new file mode 100644 index 0000000..de6099c --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -0,0 +1,252 @@ +//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, turning invoke instructions into calls, iff the callee cannot +// throw an exception, and marking functions 'nounwind' if they cannot throw. +// It implements this as a bottom-up traversal of the call-graph. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "prune-eh" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include <set> +#include <algorithm> +using namespace llvm; + +STATISTIC(NumRemoved, "Number of invokes removed"); +STATISTIC(NumUnreach, "Number of noreturn calls optimized"); + +namespace { + struct PruneEH : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + PruneEH() : CallGraphSCCPass(&ID) {} + + // runOnSCC - Analyze the SCC, performing the transformation if possible. + bool runOnSCC(CallGraphSCC &SCC); + + bool SimplifyFunction(Function *F); + void DeleteBasicBlock(BasicBlock *BB); + }; +} + +char PruneEH::ID = 0; +static RegisterPass<PruneEH> +X("prune-eh", "Remove unused exception handling info"); + +Pass *llvm::createPruneEHPass() { return new PruneEH(); } + + +bool PruneEH::runOnSCC(CallGraphSCC &SCC) { + SmallPtrSet<CallGraphNode *, 8> SCCNodes; + CallGraph &CG = getAnalysis<CallGraph>(); + bool MadeChange = false; + + // Fill SCCNodes with the elements of the SCC. Used for quickly + // looking up whether a given CallGraphNode is in this SCC. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert(*I); + + // First pass, scan all of the functions in the SCC, simplifying them + // according to what we know. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (Function *F = (*I)->getFunction()) + MadeChange |= SimplifyFunction(F); + + // Next, check to see if any callees might throw or if there are any external + // functions in this SCC: if so, we cannot prune any functions in this SCC. + // Definitions that are weak and not declared non-throwing might be + // overridden at linktime with something that throws, so assume that. + // If this SCC includes the unwind instruction, we KNOW it throws, so + // obviously the SCC might throw. + // + bool SCCMightUnwind = false, SCCMightReturn = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); + (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { + Function *F = (*I)->getFunction(); + if (F == 0) { + SCCMightUnwind = true; + SCCMightReturn = true; + } else if (F->isDeclaration() || F->mayBeOverridden()) { + SCCMightUnwind |= !F->doesNotThrow(); + SCCMightReturn |= !F->doesNotReturn(); + } else { + bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); + bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); + + if (!CheckUnwind && !CheckReturn) + continue; + + // Check to see if this function performs an unwind or calls an + // unwinding function. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) { + // Uses unwind! + SCCMightUnwind = true; + } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) { + SCCMightReturn = true; + } + + // Invoke instructions don't allow unwinding to continue, so we are + // only interested in call instructions. + if (CheckUnwind && !SCCMightUnwind) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->doesNotThrow()) { + // This call cannot throw. + } else if (Function *Callee = CI->getCalledFunction()) { + CallGraphNode *CalleeNode = CG[Callee]; + // If the callee is outside our current SCC then we may + // throw because it might. + if (!SCCNodes.count(CalleeNode)) { + SCCMightUnwind = true; + break; + } + } else { + // Indirect call, it might throw. + SCCMightUnwind = true; + break; + } + } + if (SCCMightUnwind && SCCMightReturn) break; + } + } + } + + // If the SCC doesn't unwind or doesn't throw, note this fact. + if (!SCCMightUnwind || !SCCMightReturn) + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Attributes NewAttributes = Attribute::None; + + if (!SCCMightUnwind) + NewAttributes |= Attribute::NoUnwind; + if (!SCCMightReturn) + NewAttributes |= Attribute::NoReturn; + + Function *F = (*I)->getFunction(); + const AttrListPtr &PAL = F->getAttributes(); + const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes); + if (PAL != NPAL) { + MadeChange = true; + F->setAttributes(NPAL); + } + } + + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + // Convert any invoke instructions to non-throwing functions in this node + // into call instructions with a branch. This makes the exception blocks + // dead. + if (Function *F = (*I)->getFunction()) + MadeChange |= SimplifyFunction(F); + } + + return MadeChange; +} + + +// SimplifyFunction - Given information about callees, simplify the specified +// function if we have invokes to non-unwinding functions or code after calls to +// no-return functions. +bool PruneEH::SimplifyFunction(Function *F) { + bool MadeChange = false; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) + if (II->doesNotThrow()) { + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), + Args.begin(), Args.end(), "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + + // Anything that used the value produced by the invoke instruction + // now uses the value produced by the call instruction. Note that we + // do this even for void functions and calls with no uses so that the + // callgraph edge is updated. + II->replaceAllUsesWith(Call); + BasicBlock *UnwindBlock = II->getUnwindDest(); + UnwindBlock->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the + // invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + BB->getInstList().pop_back(); + + // If the unwind block is now dead, nuke it. + if (pred_begin(UnwindBlock) == pred_end(UnwindBlock)) + DeleteBasicBlock(UnwindBlock); // Delete the new BB. + + ++NumRemoved; + MadeChange = true; + } + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) + if (CallInst *CI = dyn_cast<CallInst>(I++)) + if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) { + // This call calls a function that cannot return. Insert an + // unreachable instruction after it and simplify the code. Do this + // by splitting the BB, adding the unreachable, then deleting the + // new BB. + BasicBlock *New = BB->splitBasicBlock(I); + + // Remove the uncond branch and add an unreachable. + BB->getInstList().pop_back(); + new UnreachableInst(BB->getContext(), BB); + + DeleteBasicBlock(New); // Delete the new BB. + MadeChange = true; + ++NumUnreach; + break; + } + } + + return MadeChange; +} + +/// DeleteBasicBlock - remove the specified basic block from the program, +/// updating the callgraph to reflect any now-obsolete edges due to calls that +/// exist in the BB. +void PruneEH::DeleteBasicBlock(BasicBlock *BB) { + assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!"); + CallGraph &CG = getAnalysis<CallGraph>(); + + CallGraphNode *CGN = CG[BB->getParent()]; + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { + --I; + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (!isa<DbgInfoIntrinsic>(I)) + CGN->removeCallEdgeFor(CI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + CGN->removeCallEdgeFor(II); + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + + // Get the list of successors of this block. + std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB)); + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + Succs[i]->removePredecessor(BB); + + BB->eraseFromParent(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp new file mode 100644 index 0000000..4566a76 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -0,0 +1,71 @@ +//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for +// dead declarations and removes them. Dead declarations are declarations of +// functions for which no implementation is available (i.e., declarations for +// unused library functions). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strip-dead-prototypes" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); + +namespace { + +/// @brief Pass to remove unused function declarations. +class StripDeadPrototypesPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + StripDeadPrototypesPass() : ModulePass(&ID) { } + virtual bool runOnModule(Module &M); +}; + +} // end anonymous namespace + +char StripDeadPrototypesPass::ID = 0; +static RegisterPass<StripDeadPrototypesPass> +X("strip-dead-prototypes", "Strip Unused Function Prototypes"); + +bool StripDeadPrototypesPass::runOnModule(Module &M) { + bool MadeChange = false; + + // Erase dead function prototypes. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function *F = I++; + // Function must be a prototype and unused. + if (F->isDeclaration() && F->use_empty()) { + F->eraseFromParent(); + ++NumDeadPrototypes; + MadeChange = true; + } + } + + // Erase dead global var prototypes. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ) { + GlobalVariable *GV = I++; + // Global must be a prototype and unused. + if (GV->isDeclaration() && GV->use_empty()) + GV->eraseFromParent(); + } + + // Return an indication of whether we changed anything or not. + return MadeChange; +} + +ModulePass *llvm::createStripDeadPrototypesPass() { + return new StripDeadPrototypesPass(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp new file mode 100644 index 0000000..6bc8e66 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -0,0 +1,297 @@ +//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The StripSymbols transformation implements code stripping. Specifically, it +// can delete: +// +// * names for virtual registers +// * symbols for internal globals and functions +// * debug information +// +// Note that this transformation makes code much less readable, so it should +// only be used in situations where the 'strip' utility would be used, such as +// reducing code size or making it harder to reverse engineer code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +namespace { + class StripSymbols : public ModulePass { + bool OnlyDebugInfo; + public: + static char ID; // Pass identification, replacement for typeid + explicit StripSymbols(bool ODI = false) + : ModulePass(&ID), OnlyDebugInfo(ODI) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + + class StripNonDebugSymbols : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripNonDebugSymbols() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + + class StripDebugDeclare : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDebugDeclare() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char StripSymbols::ID = 0; +static RegisterPass<StripSymbols> +X("strip", "Strip all symbols from a module"); + +ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { + return new StripSymbols(OnlyDebugInfo); +} + +char StripNonDebugSymbols::ID = 0; +static RegisterPass<StripNonDebugSymbols> +Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module"); + +ModulePass *llvm::createStripNonDebugSymbolsPass() { + return new StripNonDebugSymbols(); +} + +char StripDebugDeclare::ID = 0; +static RegisterPass<StripDebugDeclare> +Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics"); + +ModulePass *llvm::createStripDebugDeclarePass() { + return new StripDebugDeclare(); +} + +/// OnlyUsedBy - Return true if V is only used by Usr. +static bool OnlyUsedBy(Value *V, Value *Usr) { + for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { + User *U = *I; + if (U != Usr) + return false; + } + return true; +} + +static void RemoveDeadConstant(Constant *C) { + assert(C->use_empty() && "Constant is not dead!"); + SmallPtrSet<Constant*, 4> Operands; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + if (isa<DerivedType>(C->getOperand(i)->getType()) && + OnlyUsedBy(C->getOperand(i), C)) + Operands.insert(cast<Constant>(C->getOperand(i))); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. + GV->eraseFromParent(); + } + else if (!isa<Function>(C)) + if (isa<CompositeType>(C->getType())) + C->destroyConstant(); + + // If the constant referenced anything, see if we can delete it as well. + for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(), + OE = Operands.end(); OI != OE; ++OI) + RemoveDeadConstant(*OI); +} + +// Strip the symbol table of its names. +// +static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { + for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) { + Value *V = VI->getValue(); + ++VI; + if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) { + if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg")) + // Set name to "", removing from symbol table! + V->setName(""); + } + } +} + +// Strip the symbol table of its names. +static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { + for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) { + if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg")) + ++TI; + else + ST.remove(TI++); + } +} + +/// Find values that are marked as llvm.used. +static void findUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + if (LLVMUsed == 0) return; + UsedValues.insert(LLVMUsed); + + ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + +/// StripSymbolNames - Strip symbol names. +static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { + + SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; + findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues); + findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues); + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) + I->setName(""); // Internal symbols can't participate in linkage + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) + I->setName(""); // Internal symbols can't participate in linkage + StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); + } + + // Remove all names from types. + StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo); + + return true; +} + +// StripDebugInfo - Strip debug info in the module if it exists. +// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and +// llvm.dbg.region.end calls, and any globals they point to if now dead. +static bool StripDebugInfo(Module &M) { + + bool Changed = false; + + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. + if (Function *Declare = M.getFunction("llvm.dbg.declare")) { + while (!Declare->use_empty()) { + CallInst *CI = cast<CallInst>(Declare->use_back()); + CI->eraseFromParent(); + } + Declare->eraseFromParent(); + Changed = true; + } + + if (Function *DbgVal = M.getFunction("llvm.dbg.value")) { + while (!DbgVal->use_empty()) { + CallInst *CI = cast<CallInst>(DbgVal->use_back()); + CI->eraseFromParent(); + } + DbgVal->eraseFromParent(); + Changed = true; + } + + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); + if (NMD) { + Changed = true; + NMD->eraseFromParent(); + } + + NMD = M.getNamedMetadata("llvm.dbg.lv"); + if (NMD) { + Changed = true; + NMD->eraseFromParent(); + } + + unsigned MDDbgKind = M.getMDKindID("dbg"); + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; + ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; + ++BI) + BI->setMetadata(MDDbgKind, 0); + + return true; +} + +bool StripSymbols::runOnModule(Module &M) { + bool Changed = false; + Changed |= StripDebugInfo(M); + if (!OnlyDebugInfo) + Changed |= StripSymbolNames(M, false); + return Changed; +} + +bool StripNonDebugSymbols::runOnModule(Module &M) { + return StripSymbolNames(M, true); +} + +bool StripDebugDeclare::runOnModule(Module &M) { + + Function *Declare = M.getFunction("llvm.dbg.declare"); + std::vector<Constant*> DeadConstants; + + if (Declare) { + while (!Declare->use_empty()) { + CallInst *CI = cast<CallInst>(Declare->use_back()); + Value *Arg1 = CI->getOperand(1); + Value *Arg2 = CI->getOperand(2); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg1->use_empty()) { + if (Constant *C = dyn_cast<Constant>(Arg1)) + DeadConstants.push_back(C); + else + RecursivelyDeleteTriviallyDeadInstructions(Arg1); + } + if (Arg2->use_empty()) + if (Constant *C = dyn_cast<Constant>(Arg2)) + DeadConstants.push_back(C); + } + Declare->eraseFromParent(); + } + + while (!DeadConstants.empty()) { + Constant *C = DeadConstants.back(); + DeadConstants.pop_back(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (GV->hasLocalLinkage()) + RemoveDeadConstant(GV); + } else + RemoveDeadConstant(C); + } + + return true; +} diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp new file mode 100644 index 0000000..473e83c --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp @@ -0,0 +1,364 @@ +//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass finds functions that return a struct (using a pointer to the struct +// as the first argument of the function, marked with the 'sret' attribute) and +// replaces them with a new function that simply returns each of the elements of +// that struct (using multiple return values). +// +// This pass works under a number of conditions: +// 1. The returned struct must not contain other structs +// 2. The returned struct must only be used to load values from +// 3. The placeholder struct passed in is the result of an alloca +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sretpromotion" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); +STATISTIC(NumSRET , "Number of sret promoted"); +namespace { + /// SRETPromotion - This pass removes sret parameter and updates + /// function to use multiple return value. + /// + struct SRETPromotion : public CallGraphSCCPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + CallGraphSCCPass::getAnalysisUsage(AU); + } + + virtual bool runOnSCC(CallGraphSCC &SCC); + static char ID; // Pass identification, replacement for typeid + SRETPromotion() : CallGraphSCCPass(&ID) {} + + private: + CallGraphNode *PromoteReturn(CallGraphNode *CGN); + bool isSafeToUpdateAllCallers(Function *F); + Function *cloneFunctionBody(Function *F, const StructType *STy); + CallGraphNode *updateCallSites(Function *F, Function *NF); + bool nestedStructType(const StructType *STy); + }; +} + +char SRETPromotion::ID = 0; +static RegisterPass<SRETPromotion> +X("sretpromotion", "Promote sret arguments to multiple ret values"); + +Pass *llvm::createStructRetPromotionPass() { + return new SRETPromotion(); +} + +bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) { + bool Changed = false; + + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (CallGraphNode *NewNode = PromoteReturn(*I)) { + SCC.ReplaceNode(*I, NewNode); + Changed = true; + } + + return Changed; +} + +/// PromoteReturn - This method promotes function that uses StructRet paramater +/// into a function that uses multiple return values. +CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { + Function *F = CGN->getFunction(); + + if (!F || F->isDeclaration() || !F->hasLocalLinkage()) + return 0; + + // Make sure that function returns struct. + if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) + return 0; + + DEBUG(dbgs() << "SretPromotion: Looking at sret function " + << F->getName() << "\n"); + + assert(F->getReturnType()->isVoidTy() && "Invalid function return type"); + Function::arg_iterator AI = F->arg_begin(); + const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType()); + assert(FArgType && "Invalid sret parameter type"); + const llvm::StructType *STy = + dyn_cast<StructType>(FArgType->getElementType()); + assert(STy && "Invalid sret parameter element type"); + + // Check if it is ok to perform this promotion. + if (isSafeToUpdateAllCallers(F) == false) { + DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); + NumRejectedSRETUses++; + return 0; + } + + DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); + NumSRET++; + // [1] Replace use of sret parameter + AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", + F->getEntryBlock().begin()); + Value *NFirstArg = F->arg_begin(); + NFirstArg->replaceAllUsesWith(TheAlloca); + + // [2] Find and replace ret instructions + for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { + Instruction *I = BI; + ++BI; + if (isa<ReturnInst>(I)) { + Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); + ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I); + I->replaceAllUsesWith(NR); + I->eraseFromParent(); + } + } + + // [3] Create the new function body and insert it into the module. + Function *NF = cloneFunctionBody(F, STy); + + // [4] Update all call sites to use new function + CallGraphNode *NF_CFN = updateCallSites(F, NF); + + CallGraph &CG = getAnalysis<CallGraph>(); + NF_CFN->stealCalledFunctionsFrom(CG[F]); + + delete CG.removeFunctionFromModule(F); + return NF_CFN; +} + +// Check if it is ok to perform this promotion. +bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { + + if (F->use_empty()) + // No users. OK to modify signature. + return true; + + for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end(); + FnUseI != FnUseE; ++FnUseI) { + // The function is passed in as an argument to (possibly) another function, + // we can't change it! + CallSite CS = CallSite::get(*FnUseI); + Instruction *Call = CS.getInstruction(); + // The function is used by something else than a call or invoke instruction, + // we can't change it! + if (!Call || !CS.isCallee(FnUseI)) + return false; + CallSite::arg_iterator AI = CS.arg_begin(); + Value *FirstArg = *AI; + + if (!isa<AllocaInst>(FirstArg)) + return false; + + // Check FirstArg's users. + for (Value::use_iterator ArgI = FirstArg->use_begin(), + ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { + + // If FirstArg user is a CallInst that does not correspond to current + // call site then this function F is not suitable for sret promotion. + if (CallInst *CI = dyn_cast<CallInst>(ArgI)) { + if (CI != Call) + return false; + } + // If FirstArg user is a GEP whose all users are not LoadInst then + // this function F is not suitable for sret promotion. + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(ArgI)) { + // TODO : Use dom info and insert PHINodes to collect get results + // from multiple call sites for this GEP. + if (GEP->getParent() != Call->getParent()) + return false; + for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); + GEPI != GEPE; ++GEPI) + if (!isa<LoadInst>(GEPI)) + return false; + } + // Any other FirstArg users make this function unsuitable for sret + // promotion. + else + return false; + } + } + + return true; +} + +/// cloneFunctionBody - Create a new function based on F and +/// insert it into module. Remove first argument. Use STy as +/// the return type for new function. +Function *SRETPromotion::cloneFunctionBody(Function *F, + const StructType *STy) { + + const FunctionType *FTy = F->getFunctionType(); + std::vector<const Type*> Params; + + // Attributes - Keep track of the parameter attributes for the arguments. + SmallVector<AttributeWithIndex, 8> AttributesVec; + const AttrListPtr &PAL = F->getAttributes(); + + // Add any return attributes. + if (Attributes attrs = PAL.getRetAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); + + // Skip first argument. + Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + ++I; + // 0th parameter attribute is reserved for return type. + // 1th parameter attribute is for first 1st sret argument. + unsigned ParamIndex = 2; + while (I != E) { + Params.push_back(I->getType()); + if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) + AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); + ++I; + ++ParamIndex; + } + + // Add any fn attributes. + if (Attributes attrs = PAL.getFnAttributes()) + AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); + + + FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg()); + Function *NF = Function::Create(NFTy, F->getLinkage()); + NF->takeName(F); + NF->copyAttributesFrom(F); + NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); + F->getParent()->getFunctionList().insert(F, NF); + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Replace arguments + I = F->arg_begin(); + E = F->arg_end(); + Function::arg_iterator NI = NF->arg_begin(); + ++I; + while (I != E) { + I->replaceAllUsesWith(NI); + NI->takeName(I); + ++I; + ++NI; + } + + return NF; +} + +/// updateCallSites - Update all sites that call F to use NF. +CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { + CallGraph &CG = getAnalysis<CallGraph>(); + SmallVector<Value*, 16> Args; + + // Attributes - Keep track of the parameter attributes for the arguments. + SmallVector<AttributeWithIndex, 8> ArgAttrsVec; + + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + + while (!F->use_empty()) { + CallSite CS = CallSite::get(*F->use_begin()); + Instruction *Call = CS.getInstruction(); + + const AttrListPtr &PAL = F->getAttributes(); + // Add any return attributes. + if (Attributes attrs = PAL.getRetAttributes()) + ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs)); + + // Copy arguments, however skip first one. + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + Value *FirstCArg = *AI; + ++AI; + // 0th parameter attribute is reserved for return type. + // 1th parameter attribute is for first 1st sret argument. + unsigned ParamIndex = 2; + while (AI != AE) { + Args.push_back(*AI); + if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) + ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); + ++ParamIndex; + ++AI; + } + + // Add any function attributes. + if (Attributes attrs = PAL.getFnAttributes()) + ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs)); + + AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end()); + + // Build new call instruction. + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + cast<InvokeInst>(New)->setAttributes(NewPAL); + } else { + New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + cast<CallInst>(New)->setAttributes(NewPAL); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + Args.clear(); + ArgAttrsVec.clear(); + New->takeName(Call); + + // Update the callgraph to know that the callsite has been transformed. + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->removeCallEdgeFor(Call); + CalleeNode->addCalledFunction(New, NF_CGN); + + // Update all users of sret parameter to extract value using extractvalue. + for (Value::use_iterator UI = FirstCArg->use_begin(), + UE = FirstCArg->use_end(); UI != UE; ) { + User *U2 = *UI++; + CallInst *C2 = dyn_cast<CallInst>(U2); + if (C2 && (C2 == Call)) + continue; + + GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2); + ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2)); + Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), + "evi", UGEP); + while(!UGEP->use_empty()) { + // isSafeToUpdateAllCallers has checked that all GEP uses are + // LoadInsts + LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); + L->replaceAllUsesWith(GR); + L->eraseFromParent(); + } + UGEP->eraseFromParent(); + continue; + } + Call->eraseFromParent(); + } + + return NF_CGN; +} + +/// nestedStructType - Return true if STy includes any +/// other aggregate types +bool SRETPromotion::nestedStructType(const StructType *STy) { + unsigned Num = STy->getNumElements(); + for (unsigned i = 0; i < Num; i++) { + const Type *Ty = STy->getElementType(i); + if (!Ty->isSingleValueType() && !Ty->isVoidTy()) + return true; + } + return false; +} |