diff options
author | dim <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
commit | cbb70ce070d220642b038ea101d9c0f9fbf860d6 (patch) | |
tree | d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/Transforms/IPO | |
parent | 4ace901e87dac5bbbac78ed325e75462e48e386e (diff) | |
download | FreeBSD-src-cbb70ce070d220642b038ea101d9c0f9fbf860d6.zip FreeBSD-src-cbb70ce070d220642b038ea101d9c0f9fbf860d6.tar.gz |
Vendor import of llvm trunk r126079:
http://llvm.org/svn/llvm-project/llvm/trunk@126079
Diffstat (limited to 'lib/Transforms/IPO')
24 files changed, 1375 insertions, 998 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 0c77e1f..0c650cf 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -39,7 +39,6 @@ #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -67,7 +66,9 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) {} + : CallGraphSCCPass(ID), maxElements(maxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); + } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; @@ -84,8 +85,12 @@ namespace { } char ArgPromotion::ID = 0; -INITIALIZE_PASS(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false); +INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); @@ -130,47 +135,74 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't - // transform functions that have indirect callers. - if (F->hasAddressTaken()) - return 0; - + // transform functions that have indirect callers. Also see if the function + // is self-recursive. + bool isSelfRecursive = false; + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); + UI != E; ++UI) { + CallSite CS(*UI); + // Must be a direct call. + if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0; + + if (CS.getInstruction()->getParent()->getParent() == F) + isSelfRecursive = true; + } + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); + Argument *PtrArg = PointerArgs[i].first; + const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. - Argument *PtrArg = PointerArgs[i].first; if (isByVal) { - const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); - } else { - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - if (!STy->getElementType(i)->isSingleValueType()) { - AllSimple = false; - break; - } - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow scalarrepl to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; + continue; + } + + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + if (!STy->getElementType(i)->isSingleValueType()) { + AllSimple = false; + break; } } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow scalarrepl to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } } } + // If the argument is a recursive type and we're in a recursive + // function, we could end up infinitely peeling the function argument. + if (isSelfRecursive) { + if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + bool RecursiveType = false; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + if (STy->getElementType(i) == PtrArg->getType()) { + RecursiveType = true; + break; + } + } + if (RecursiveType) + continue; + } + } + // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, isByVal)) ArgsToPromote.insert(PtrArg); @@ -183,22 +215,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } -/// IsAlwaysValidPointer - Return true if the specified pointer is always legal -/// to load. -static bool IsAlwaysValidPointer(Value *V) { - if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true; - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) - return IsAlwaysValidPointer(GEP->getOperand(0)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::GetElementPtr) - return IsAlwaysValidPointer(CE->getOperand(0)); - - return false; -} - -/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that +/// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); unsigned ArgNo = std::distance(Callee->arg_begin(), @@ -211,7 +230,7 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { CallSite CS(*UI); assert(CS && "Should only have direct calls!"); - if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) return false; } return true; @@ -318,7 +337,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) + if (isByVal || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -434,8 +453,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - TargetData *TD = getAnalysisIfAvailable<TargetData>(); - if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to @@ -443,11 +460,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); - const PointerType *LoadTy = - cast<PointerType>(Load->getPointerOperand()->getType()); - unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); - - if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) + AliasAnalysis::Location Loc = AA.getLocation(Load); + if (AA.canInstructionRangeModify(BB->front(), *Load, Loc)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. @@ -458,7 +472,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > I = idf_ext_begin(P, TranspBlocks), E = idf_ext_end(P, TranspBlocks); I != E; ++I) - if (AA.canBasicBlockModify(**I, Arg, LoadSize)) + if (AA.canBasicBlockModify(**I, Loc)) return false; } } @@ -694,6 +708,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); + // Transfer the TBAA info too. + newLoad->setMetadata(LLVMContext::MD_tbaa, + OrigLoad->getMetadata(LLVMContext::MD_tbaa)); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 65483e8..efdeec5 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -17,11 +17,8 @@ add_llvm_library(LLVMipo LowerSetJmp.cpp MergeFunctions.cpp PartialInlining.cpp - PartialSpecialization.cpp PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp StructRetPromotion.cpp ) - -target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine) diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 64e8d79..a21efce 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -33,7 +33,9 @@ STATISTIC(NumMerged, "Number of global constants merged"); namespace { struct ConstantMerge : public ModulePass { static char ID; // Pass identification, replacement for typeid - ConstantMerge() : ModulePass(ID) {} + ConstantMerge() : ModulePass(ID) { + initializeConstantMergePass(*PassRegistry::getPassRegistry()); + } // run - For this pass, process all of the globals in the module, // eliminating duplicate constants. @@ -44,7 +46,7 @@ namespace { char ConstantMerge::ID = 0; INITIALIZE_PASS(ConstantMerge, "constmerge", - "Merge Duplicate Global Constants", false, false); + "Merge Duplicate Global Constants", false, false) ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } @@ -63,6 +65,18 @@ static void FindUsedValues(GlobalVariable *LLVMUsed, UsedValues.insert(GV); } +// True if A is better than B. +static bool IsBetterCannonical(const GlobalVariable &A, + const GlobalVariable &B) { + if (!A.hasLocalLinkage() && B.hasLocalLinkage()) + return true; + + if (A.hasLocalLinkage() && !B.hasLocalLinkage()) + return false; + + return A.hasUnnamedAddr(); +} + bool ConstantMerge::runOnModule(Module &M) { // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; @@ -83,44 +97,76 @@ bool ConstantMerge::runOnModule(Module &M) { // second level constants have initializers which point to the globals that // were just merged. while (1) { - // First pass: identify all globals that can be merged together, filling in - // the Replacements vector. We cannot do the replacement in this pass - // because doing so may cause initializers of other globals to be rewritten, - // invalidating the Constant* pointers in CMap. - // + + // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; - + // If this GV is dead, remove it. GV->removeDeadConstantUsers(); if (GV->use_empty() && GV->hasLocalLinkage()) { GV->eraseFromParent(); continue; } - - // Only process constants with initializers in the default addres space. - if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || - GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() || + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV)) continue; - - - + Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. GlobalVariable *&Slot = CMap[Init]; - if (Slot == 0) { // Nope, add it to the map. + // If this is the first constant we find or if the old on is local, + // replace with the current one. It the current is externally visible + // it cannot be replace, but can be the canonical constant we merge with. + if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) { Slot = GV; - } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! - // Make all uses of the duplicate constant use the canonical version. - Replacements.push_back(std::make_pair(GV, Slot)); } } + // Second: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) + continue; + + // We can only replace constant with local linkage. + if (!GV->hasLocalLinkage()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *Slot = CMap[Init]; + + if (!Slot || Slot == GV) + continue; + + if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr()) + continue; + + if (!GV->hasUnnamedAddr()) + Slot->setUnnamedAddr(false); + + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } + if (Replacements.empty()) return MadeChange; CMap.clear(); @@ -133,6 +179,8 @@ bool ConstantMerge::runOnModule(Module &M) { Replacements[i].first->replaceAllUsesWith(Replacements[i].second); // Delete the global value from the module. + assert(Replacements[i].first->hasLocalLinkage() && + "Refusing to delete an externally visible global variable."); Replacements[i].first->eraseFromParent(); } diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 47df235..b423221 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -39,7 +39,8 @@ using namespace llvm; STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); - +STATISTIC(NumArgumentsReplacedWithUndef, + "Number of unread args replaced with undef"); namespace { /// DAE - The dead argument elimination pass. /// @@ -126,7 +127,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(ID) {} + DAE() : ModulePass(ID) { + initializeDAEPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -146,12 +149,13 @@ namespace { void PropagateLiveness(const RetOrArg &RA); bool RemoveDeadStuffFromFunction(Function *F); bool DeleteDeadVarargs(Function &Fn); + bool RemoveDeadArgumentsFromCallers(Function &Fn); }; } char DAE::ID = 0; -INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false); +INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) namespace { /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but @@ -168,7 +172,7 @@ namespace { char DAH::ID = 0; INITIALIZE_PASS(DAH, "deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", - false, false); + false, false) /// createDeadArgEliminationPass - This pass removes arguments from functions /// which are not used by the body of the function. @@ -285,6 +289,55 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { return true; } +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any +/// arguments that are unused, and changes the caller parameters to be undefined +/// instead. +bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) +{ + if (Fn.isDeclaration()) + return false; + + // Functions with local linkage should already have been handled. + if (Fn.hasLocalLinkage()) + return false; + + if (Fn.use_empty()) + return false; + + llvm::SmallVector<unsigned, 8> UnusedArgs; + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); + I != E; ++I) { + Argument *Arg = I; + + if (Arg->use_empty() && !Arg->hasByValAttr()) + UnusedArgs.push_back(Arg->getArgNo()); + } + + if (UnusedArgs.empty()) + return false; + + bool Changed = false; + + for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); + I != E; ++I) { + CallSite CS(*I); + if (!CS || !CS.isCallee(I)) + continue; + + // Now go through all unused args and replace them with "undef". + for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { + unsigned ArgNo = UnusedArgs[I]; + + Value *Arg = CS.getArgument(ArgNo); + CS.setArgument(ArgNo, UndefValue::get(Arg->getType())); + ++NumArgumentsReplacedWithUndef; + Changed = true; + } + } + + return Changed; +} + /// Convenience function that returns the number of return values. It returns 0 /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. @@ -791,7 +844,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } else if (New->getType()->isVoidTy()) { // Our return value has uses, but they will get removed later on. // Replace by null for now. - Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + if (!Call->getType()->isX86_MMXTy()) + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { assert(RetTy->isStructTy() && "Return type changed, but not into a void. The old return type" @@ -854,7 +908,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } else { // If this argument is dead, replace any uses of it with null constants // (these are guaranteed to become unused later on). - I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + if (!I->getType()->isX86_MMXTy()) + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); } // If we change the return value of the function we must rewrite any return @@ -935,5 +990,14 @@ bool DAE::runOnModule(Module &M) { Function *F = I++; Changed |= RemoveDeadStuffFromFunction(F); } + + // Finally, look for any unused parameters in functions with non-local + // linkage and replace the passed in parameters with undef. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function& F = *I; + + Changed |= RemoveDeadArgumentsFromCallers(F); + } + return Changed; } diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp index 5dc50c5..a509931 100644 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -26,7 +26,9 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); namespace { struct DTE : public ModulePass { static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(ID) {} + DTE() : ModulePass(ID) { + initializeDTEPass(*PassRegistry::getPassRegistry()); + } // doPassInitialization - For this pass, it removes global symbol table // entries for primitive types. These are never used for linking in GCC and @@ -45,7 +47,10 @@ namespace { } char DTE::ID = 0; -INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false); +INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination", + false, false) +INITIALIZE_PASS_DEPENDENCY(FindUsedTypes) +INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false) ModulePass *llvm::createDeadTypeEliminationPass() { return new DTE(); diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 45c5fe7..9d432de 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -50,24 +50,22 @@ namespace { // Visit the GlobalVariables. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - if (I->hasLocalLinkage()) - I->setVisibility(GlobalValue::HiddenVisibility); - I->setLinkage(GlobalValue::ExternalLinkage); - if (deleteStuff == Named.count(I)) - I->setInitializer(0); - } + I != E; ++I) { + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) + I->setInitializer(0); + } // Visit the Functions. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) { - if (I->hasLocalLinkage()) - I->setVisibility(GlobalValue::HiddenVisibility); - I->setLinkage(GlobalValue::ExternalLinkage); - if (deleteStuff == Named.count(I)) - I->deleteBody(); - } + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) + I->deleteBody(); + } return true; } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 6165ba0..95decec 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -23,10 +23,10 @@ #include "llvm/CallGraphSCCPass.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" @@ -41,7 +41,9 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID) {} + FunctionAttrs() : CallGraphSCCPass(ID), AA(0) { + initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -61,67 +63,25 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); CallGraphSCCPass::getAnalysisUsage(AU); } - bool PointsToLocalMemory(Value *V); + private: + AliasAnalysis *AA; }; } char FunctionAttrs::ID = 0; -INITIALIZE_PASS(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false); +INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } -/// PointsToLocalMemory - Returns whether the given pointer value points to -/// memory that is local to the function. Global constants are considered -/// local to all functions. -bool FunctionAttrs::PointsToLocalMemory(Value *V) { - SmallVector<Value*, 16> Worklist; - unsigned MaxLookup = 8; - - Worklist.push_back(V); - - do { - V = Worklist.pop_back_val()->getUnderlyingObject(); - - // An alloca instruction defines local memory. - if (isa<AllocaInst>(V)) - continue; - - // A global constant counts as local memory for our purposes. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (!GV->isConstant()) - return false; - continue; - } - - // If both select values point to local memory, then so does the select. - if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - Worklist.push_back(SI->getTrueValue()); - Worklist.push_back(SI->getFalseValue()); - continue; - } - - // If all values incoming to a phi node point to local memory, then so does - // the phi. - if (PHINode *PN = dyn_cast<PHINode>(V)) { - // Don't bother inspecting phi nodes with many operands. - if (PN->getNumIncomingValues() > MaxLookup) - return false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); - continue; - } - - return false; - } while (!Worklist.empty() && --MaxLookup); - - return Worklist.empty(); -} - /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; @@ -141,14 +101,15 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // External node - may write memory. Just give up. return false; - if (F->doesNotAccessMemory()) + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); + if (MRB == AliasAnalysis::DoesNotAccessMemory) // Already perfect! continue; // Definitions with weak linkage may be overridden at linktime with // something that writes memory, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) { - if (!F->onlyReadsMemory()) + if (!AliasAnalysis::onlyReadsMemory(MRB)) // May write memory. Just give up. return false; @@ -163,32 +124,62 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); - if (CS && CS.getCalledFunction()) { + if (CS) { // Ignore calls to functions in the same SCC. - if (SCCNodes.count(CS.getCalledFunction())) + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) continue; - // Ignore intrinsics that only access local memory. - if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) - if (AliasAnalysis::getIntrinsicModRefBehavior(id) == - AliasAnalysis::AccessesArguments) { - // Check that all pointer arguments point to local memory. + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); + // If the call doesn't access arbitrary memory, we may be able to + // figure out something. + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + // If the call does access argument pointees, check each argument. + if (AliasAnalysis::doesAccessArgPointees(MRB)) + // Check whether all pointer arguments point to local memory, and + // ignore calls that only access local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; - if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg)) - // Writes memory. Just give up. - return false; + if (Arg->getType()->isPointerTy()) { + AliasAnalysis::Location Loc(Arg, + AliasAnalysis::UnknownSize, + I->getMetadata(LLVMContext::MD_tbaa)); + if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { + if (MRB & AliasAnalysis::Mod) + // Writes non-local memory. Give up. + return false; + if (MRB & AliasAnalysis::Ref) + // Ok, it reads non-local memory. + ReadsMemory = true; + } + } } - // Only reads and writes local memory. - continue; - } - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore loads from local memory. - if (PointsToLocalMemory(LI->getPointerOperand())) continue; + } + // The call could access any memory. If that includes writes, give up. + if (MRB & AliasAnalysis::Mod) + return false; + // If it reads, note it. + if (MRB & AliasAnalysis::Ref) + ReadsMemory = true; + continue; + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads from local memory. + if (!LI->isVolatile()) { + AliasAnalysis::Location Loc = AA->getLocation(LI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Ignore stores to local memory. - if (PointsToLocalMemory(SI->getPointerOperand())) + // Ignore non-volatile stores to local memory. + if (!SI->isVolatile()) { + AliasAnalysis::Location Loc = AA->getLocation(SI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { + // Ignore vaargs on local memory. + AliasAnalysis::Location Loc = AA->getLocation(VI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } @@ -198,10 +189,6 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Writes memory. Just give up. return false; - if (isMalloc(I)) - // malloc claims not to write memory! PR3754. - return false; - // If this instruction may read memory, remember that. ReadsMemory |= I->mayReadFromMemory(); } @@ -384,6 +371,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { } bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { + AA = &getAnalysis<AliasAnalysis>(); + bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index aa18601..2b427aa 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -31,7 +31,9 @@ STATISTIC(NumVariables, "Number of global variables removed"); namespace { struct GlobalDCE : public ModulePass { static char ID; // Pass identification, replacement for typeid - GlobalDCE() : ModulePass(ID) {} + GlobalDCE() : ModulePass(ID) { + initializeGlobalDCEPass(*PassRegistry::getPassRegistry()); + } // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. @@ -52,7 +54,7 @@ namespace { char GlobalDCE::ID = 0; INITIALIZE_PASS(GlobalDCE, "globaldce", - "Dead Global Elimination", false, false); + "Dead Global Elimination", false, false) ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a77af54..d4cb712 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -40,6 +40,7 @@ using namespace llvm; STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); @@ -55,11 +56,14 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); namespace { + struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(ID) {} + GlobalOpt() : ModulePass(ID) { + initializeGlobalOptPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -69,13 +73,16 @@ namespace { bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, + const SmallPtrSet<const PHINode*, 16> &PHIUsers, + const GlobalStatus &GS); }; } char GlobalOpt::ID = 0; INITIALIZE_PASS(GlobalOpt, "globalopt", - "Global Variable Optimizer", false, false); + "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -85,6 +92,9 @@ namespace { /// about it. If we find out that the address of the global is taken, none of /// this info will be accurate. struct GlobalStatus { + /// isCompared - True if the global's address is used in a comparison. + bool isCompared; + /// isLoaded - True if the global is ever loaded. If the global isn't ever /// loaded it can be deleted. bool isLoaded; @@ -129,10 +139,11 @@ struct GlobalStatus { /// HasPHIUser - Set to true if this global has a user that is a PHI node. bool HasPHIUser; - - GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), - AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false) {} + + GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + HasPHIUser(false) {} }; } @@ -165,6 +176,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, const User *U = *UI; if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) return true; + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { @@ -221,7 +237,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { - // Nothing to analyse. + GS.isCompared = true; } else if (isa<MemTransferInst>(I)) { const MemTransferInst *MTI = cast<MemTransferInst>(I); if (MTI->getArgOperand(0) == V) @@ -308,7 +324,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit); - } else if (CE->getOpcode() == Instruction::BitCast && + } else if (CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. Changed |= CleanupConstantGlobalUsers(CE, 0); @@ -324,7 +340,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -361,7 +377,7 @@ static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast<Constant>(V)) return SafeToDestroyConstant(C); - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; @@ -371,15 +387,15 @@ static bool isSafeSROAElementUse(Value *V) { // Stores *to* the pointer are ok. if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getOperand(0) != V; - + // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); if (GEPI == 0) return false; - + if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || !cast<Constant>(GEPI->getOperand(1))->isNullValue()) return false; - + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); I != E; ++I) if (!isSafeSROAElementUse(*I)) @@ -393,11 +409,11 @@ static bool isSafeSROAElementUse(Value *V) { /// static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa<GetElementPtrInst>(U) && - (!isa<ConstantExpr>(U) || + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) return false; - + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we // don't like < 3 operand CE's, and we don't like non-constant integer // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some @@ -409,18 +425,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); ++GEPI; // Skip over the pointer index. - + // If this is a use of an array allocation, do a bit more checking for sanity. if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); - + // Check to make sure that index falls within the array. If not, // something funny is going on, so we won't do the optimization. // if (Idx->getZExtValue() >= NumElements) return false; - + // We cannot scalar repl this level of the array unless any array // sub-indices are in-range constants. In particular, consider: // A[0][i]. We cannot know that the user isn't doing invalid things like @@ -441,7 +457,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { "Indexed GEP type is not array, vector, or struct!"); continue; } - + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) return false; @@ -465,7 +481,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { } return true; } - + /// SRAGlobal - Perform scalar replacement of aggregates on the specified global /// variable. This opens the door for other optimizations by exposing the @@ -476,7 +492,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; - + assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); const Type *Ty = Init->getType(); @@ -488,7 +504,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - + if (const StructType *STy = dyn_cast<StructType>(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); @@ -503,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -522,7 +538,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NumElements > 16 && GV->hasNUsesOrMore(16)) return 0; // It's not worth it. NewGlobals.reserve(NumElements); - + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { @@ -537,7 +553,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -549,7 +565,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -615,7 +631,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any +/// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSet<const PHINode*, 8> &PHIs) { @@ -757,7 +773,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // Keep track of whether we are able to remove all the uses of the global // other than the store that defines it. bool AllNonStoreUsesGone = true; - + // Replace all uses of loads with uses of uses of the stored value. for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; @@ -830,7 +846,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, ConstantInt *NElements, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - + const Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; @@ -840,14 +856,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - + // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. @@ -867,10 +883,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, User->replaceUsesOfWith(CI, TheBC); } } - + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, + RepValue = ConstantExpr::getBitCast(RepValue, GV->getType()->getElementType()); // If there is a comparison against null, we will insert a global bool to @@ -890,7 +906,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, SI->eraseFromParent(); continue; } - + LoadInst *LI = cast<LoadInst>(GV->use_back()); while (!LI->use_empty()) { Use &LoadUse = LI->use_begin().getUse(); @@ -898,7 +914,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, LoadUse = RepValue; continue; } - + ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); // Replace the cmp X, 0 with a use of the bool value. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); @@ -963,20 +979,20 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { continue; // Fine, ignore. } - + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - + // Must index into the array and into the struct. if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. @@ -985,13 +1001,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, return false; continue; } - + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; } - + return false; } return true; @@ -1000,9 +1016,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV /// somewhere. Transform all uses of the allocation into loads from the /// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. -static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { while (!Alloc->use_empty()) { Instruction *U = cast<Instruction>(*Alloc->use_begin()); @@ -1035,7 +1051,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, continue; } } - + // Insert a load from the global, and use it instead of the malloc. Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); U->replaceUsesOfWith(Alloc, NL); @@ -1053,24 +1069,24 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { const Instruction *User = cast<Instruction>(*UI); - + // Comparison against null is ok. if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return false; continue; } - + // getelementptr is also ok, but only a simple form. if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; - + // Otherwise the GEP is ok. continue; } - + if (const PHINode *PN = dyn_cast<PHINode>(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. @@ -1079,19 +1095,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, if (!LoadUsingPHIs.insert(PN)) // If we have already analyzed this PHI, then it is safe. continue; - + // Make sure all uses of the PHI are simple enough to transform. if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; - + continue; } - + // Otherwise we don't know what this is, not ok. return false; } - + return true; } @@ -1110,10 +1126,10 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, return false; LoadUsingPHIsPerLoad.clear(); } - + // If we reach here, we know that all uses of the loads and transitive uses // (through PHI nodes) are simple enough to transform. However, we don't know - // that all inputs the to the PHI nodes are in the same equivalence sets. + // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() @@ -1121,29 +1137,29 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); - + // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - + if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; return false; } - + // Load from GV is ok. if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) if (LI->getOperand(0) == GV) continue; - + // UNDEF? NULL? - + // Anything else is rejected. return false; } } - + return true; } @@ -1151,15 +1167,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; - + if (FieldNo >= FieldVals.size()) FieldVals.resize(FieldNo+1); - + // If we already have this value, just reuse the previously scalarized // version. if (Value *FieldVal = FieldVals[FieldNo]) return FieldVal; - + // Depending on what instruction this is, we have several cases. Value *Result; if (LoadInst *LI = dyn_cast<LoadInst>(V)) { @@ -1172,9 +1188,9 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + const StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); - + Result = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), PN->getName()+".f"+Twine(FieldNo), PN); @@ -1183,13 +1199,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, llvm_unreachable("Unknown usable value"); Result = 0; } - + return FieldVals[FieldNo] = Result; } /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from /// the load, rewrite the derived value to use the HeapSRoA'd load. -static void RewriteHeapSROALoadUser(Instruction *LoadUser, +static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { // If this is a comparison against null, handle it. @@ -1199,30 +1215,30 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, InsertedScalarizedValues, PHIsToRewrite); - + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), + Constant::getNullValue(NPtr->getType()), SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; } - + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) && "Unexpected GEPI!"); - + // Load the pointer for this field. unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, InsertedScalarizedValues, PHIsToRewrite); - + // Create the new GEP idx vector. SmallVector<Value*, 8> GEPIdx; GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx.begin(), GEPIdx.end(), GEPI->getName(), GEPI); @@ -1243,7 +1259,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, tie(InsertPos, Inserted) = InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); if (!Inserted) return; - + // If this is the first time we've seen this PHI, recursively process all // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { @@ -1256,7 +1272,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// is a value loaded from the global. Eliminate all uses of Ptr, making them /// use FieldGlobals instead. All uses of loaded values satisfy /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. -static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); @@ -1264,7 +1280,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, Instruction *User = cast<Instruction>(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } - + if (Load->use_empty()) { Load->eraseFromParent(); InsertedScalarizedValues.erase(Load); @@ -1289,11 +1305,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // new mallocs at the same place as CI, and N globals. std::vector<Value*> FieldGlobals; std::vector<Value*> FieldMallocs; - + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ const Type *FieldTy = STy->getElementType(FieldNo); const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); - + GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, @@ -1301,7 +1317,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); if (const StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); @@ -1313,7 +1329,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); } - + // The tricky aspect of this transformation is handling the case when malloc // fails. In the original code, malloc failing would set the result pointer // of malloc to null. In this case, some mallocs could succeed and others @@ -1340,23 +1356,23 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); - + // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), "malloc_ret_null", OrigBB->getParent()); - + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond // branch on RunningOr. OrigBB->getTerminator()->eraseFromParent(); BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - + // Within the NullPtrBlock, we need to emit a comparison and branch for each // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), "tmp"); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", @@ -1371,10 +1387,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], FreeBlock); BranchInst::Create(NextBlock, FreeBlock); - + NullPtrBlock = NextBlock; } - + BranchInst::Create(ContBB, NullPtrBlock); // CI is no longer needed, remove it. @@ -1385,25 +1401,25 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// inserted for a given load. DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; - + std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; - + // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads // of the per-field globals instead. for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); - + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } - + // Must be a store of null. StoreInst *SI = cast<StoreInst>(User); assert(isa<ConstantPointerNull>(SI->getOperand(0)) && "Unexpected heap-sra user!"); - + // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); @@ -1430,7 +1446,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } - + // Drop all inter-phi links and any loads that made it this far. for (DenseMap<Value*, std::vector<Value*> >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1440,7 +1456,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) LI->dropAllReferences(); } - + // Delete all the phis and loads now that inter-references are dead. for (DenseMap<Value*, std::vector<Value*> >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1450,7 +1466,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) LI->eraseFromParent(); } - + // The old global is now dead, remove it. GV->eraseFromParent(); @@ -1468,7 +1484,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, TargetData *TD) { if (!TD) return false; - + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1508,7 +1524,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); return true; } - + // If the allocation is an array of structures, consider transforming this // into multiple malloc'd arrays, one for each field. This is basically // SRoA for malloc'd memory. @@ -1544,13 +1560,13 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = dyn_cast<BitCastInst>(Malloc) ? extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); } - + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); return true; } - + return false; -} +} // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. @@ -1568,7 +1584,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. @@ -1576,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { const Type* MallocType = getMallocAllocatedType(CI); - if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, GVI, TD)) return true; } @@ -1591,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { const Type *GVElType = GV->getType()->getElementType(); - + // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select // between them is very expensive and unlikely to lead to later @@ -1611,11 +1627,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); - + // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, - GlobalValue::InternalLinkage, + GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); @@ -1684,10 +1700,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// ProcessInternalGlobal - Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { - SmallPtrSet<const PHINode*, 16> PHIUsers; - GlobalStatus GS; +bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + if (!GV->hasLocalLinkage()) + return false; + + // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); if (GV->use_empty()) { @@ -1697,140 +1715,139 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } - if (!AnalyzeGlobal(GV, GS, PHIUsers)) { -#if 0 - DEBUG(dbgs() << "Global: " << *GV); - DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(dbgs() << " StoredType = "); - switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); - break; - case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; - } - if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); - if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " - << GS.AccessingFunction->getName() << "\n"); - DEBUG(dbgs() << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(dbgs() << " HasNonInstructionUser = " - << GS.HasNonInstructionUser<<"\n"); - DEBUG(dbgs() << "\n"); -#endif - - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive we can make - // this global a local variable) we replace the global with a local alloca - // in this function. - // - // NOTE: It doesn't make sense to promote non single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction - ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); - if (!isa<UndefValue>(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); - - GV->replaceAllUsesWith(Alloca); + SmallPtrSet<const PHINode*, 16> PHIUsers; + GlobalStatus GS; + + if (AnalyzeGlobal(GV, GS, PHIUsers)) + return false; + + if (!GS.isCompared && !GV->hasUnnamedAddr()) { + GV->setUnnamedAddr(true); + NumUnnamed++; + } + + if (GV->isConstant() || !GV->hasInitializer()) + return false; + + return ProcessInternalGlobal(GV, GVI, PHIUsers, GS); +} + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI, + const SmallPtrSet<const PHINode*, 16> &PHIUsers, + const GlobalStatus &GS) { + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction + ->getEntryBlock().begin()); + const Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + if (!isa<UndefValue>(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { GV->eraseFromParent(); - ++NumLocalized; - return true; + ++NumDeleted; + Changed = true; } - - // If the global is never loaded (but may be stored to), it is dead. - // Delete it now. - if (!GS.isLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); - - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - // If the global is dead now, delete it. - if (GV->use_empty()) { - GV->eraseFromParent(); - ++NumDeleted; - Changed = true; - } - return Changed; + return Changed; - } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); - GV->setConstant(true); + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + GV->setConstant(true); - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); - // If the global is dead now, just nuke it. - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (isa<UndefValue>(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; } - ++NumMarked; + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysisIfAvailable<TargetData>())) return true; - } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } - } else if (GS.StoredType == GlobalStatus::isStoredOnce) { - // If the initial value for the global was an undef value, and if only - // one other value was stored into it, we can just change the - // initializer to be the stored value, then delete all stores to the - // global. This allows us to mark it constant. - if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (isa<UndefValue>(GV->getInitializer())) { - // Change the initial value here. - GV->setInitializer(SOVConstant); - - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } else { - GVI = GV; - } - ++NumSubstitute; - return true; - } - // Try to optimize globals based on the knowledge that only one value - // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable<TargetData>())) + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; return true; - - // Otherwise, if the global was not a boolean, we can shrink it to be a - // boolean. - if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { - ++NumShrunkToBool; - return true; - } - } + } } + return false; } @@ -1917,10 +1934,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { if (New && New != CE) GV->setInitializer(New); } - // Do more involved optimizations if the global is internal. - if (!GV->isConstant() && GV->hasLocalLinkage() && - GV->hasInitializer()) - Changed |= ProcessInternalGlobal(GV, GVI); + + Changed |= ProcessGlobal(GV, GVI); } return Changed; } @@ -1928,46 +1943,47 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { /// FindGlobalCtors - Find the llvm.globalctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->getName() == "llvm.global_ctors") { - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - - // Verify that the initializer is simple enough for us to handle. - if (!I->hasDefinitiveInitializer()) return 0; - ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); - if (!CA) return 0; - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { - if (isa<ConstantPointerNull>(CS->getOperand(1))) - continue; + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (GV == 0) return 0; + + // Found it, verify it's an array of { int, void()* }. + const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType()); + if (!ATy) return 0; + const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); + if (!STy || STy->getNumElements() != 2 || + !STy->getElementType(0)->isIntegerTy(32)) return 0; + const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); + if (!PFTy) return 0; + const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); + if (!FTy || !FTy->getReturnType()->isVoidTy() || + FTy->isVarArg() || FTy->getNumParams() != 0) + return 0; - // Must have a function or null ptr. - if (!isa<Function>(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) - return 0; - } else { - return 0; - } - - return I; - } - return 0; + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) return 0; + + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) return 0; + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(*i); + if (CS == 0) return 0; + + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!CI || CI->getZExtValue() != 65535) + return 0; + } + + return GV; } /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, @@ -1985,13 +2001,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector<Function*> &Ctors) { // If we made a change, reassemble the initializer list. std::vector<Constant*> CSVals; CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); CSVals.push_back(0); - + // Create the new init list. std::vector<Constant*> CAList; for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { @@ -2007,26 +2023,26 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } - + // Create the array initializer. const Type *StructTy = cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); - + // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { GCL->setInitializer(CA); return GCL; } - + // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); - + // Nuke the old list, replacing any uses with the new one. if (!GCL->use_empty()) { Constant *V = NGV; @@ -2035,7 +2051,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, GCL->replaceAllUsesWith(V); } GCL->eraseFromParent(); - + if (Ctors.size()) return NGV; else @@ -2043,17 +2059,86 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } -static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, - Value *V) { +static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) { if (Constant *CV = dyn_cast<Constant>(V)) return CV; Constant *R = ComputedValues[V]; assert(R && "Reference to an uncomputed value!"); return R; } +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants); + + +/// isSimpleEnoughValueToCommit - Return true if the specified constant can be +/// handled by the code generator. We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants) { + // Simple integer, undef, constant aggregate zero, global addresses, etc are + // all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || + isa<GlobalValue>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || + isa<ConstantVector>(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + Constant *Op = cast<Constant>(C->getOperand(i)); + if (!isSimpleEnoughValueToCommit(Op, SimpleConstants)) + return false; + } + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // These casts are always fine if the casted value is. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C)) return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants); +} + + /// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast of something, -/// we punt. We basically just support direct accesses to globals and GEP's of +/// enough for us to understand. In particular, if it is a cast to anything +/// other than from one pointer type to another pointer type, we punt. +/// We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't @@ -2062,19 +2147,19 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - return GV->hasDefinitiveInitializer(); + return GV->hasUniqueInitializer(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && isa<GlobalVariable>(CE->getOperand(0)) && cast<GEPOperator>(CE)->isInBounds()) { GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - if (!GV->hasDefinitiveInitializer()) + if (!GV->hasUniqueInitializer()) return false; // The first index must be zero. @@ -2087,7 +2172,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa<GlobalVariable>(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); } + } + return false; } @@ -2101,7 +2197,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, assert(Val->getType() == Init->getType() && "Type mismatch!"); return Val; } - + std::vector<Constant*> Elts; if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { @@ -2119,13 +2215,13 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } - + // Replace the element that we are supposed to. ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - + // Return the modified struct. return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), STy->isPacked()); @@ -2138,8 +2234,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, NumElts = ATy->getNumElements(); else NumElts = cast<VectorType>(InitTy)->getNumElements(); - - + + // Break up the array into elements. if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) @@ -2154,16 +2250,15 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, " ConstantFoldLoadThroughGEPConstantExpr"); Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); } - + assert(CI->getZExtValue() < NumElts); Elts[CI->getZExtValue()] = EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - + if (Init->getType()->isArrayTy()) return ConstantArray::get(cast<ArrayType>(InitTy), Elts); - else - return ConstantVector::get(&Elts[0], Elts.size()); - } + return ConstantVector::get(Elts); + } } /// CommitValueTo - We have decided that Addr (which satisfies the predicate @@ -2189,14 +2284,14 @@ static Constant *ComputeLoadResult(Constant *P, // is the most up-to-date. DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); if (I != Memory.end()) return I->second; - + // Access it. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } - + // Handle a constantexpr getelementptr. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -2216,17 +2311,19 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl<Constant*> &ActualArgs, std::vector<Function*> &CallStack, DenseMap<Constant*, Constant*> &MutatedMemory, - std::vector<GlobalVariable*> &AllocaTmps) { + std::vector<GlobalVariable*> &AllocaTmps, + SmallPtrSet<Constant*, 8> &SimpleConstants, + const TargetData *TD) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; - + CallStack.push_back(F); - + /// Values - As we compute SSA register values, we store their contents here. DenseMap<Value*, Constant*> Values; - + // Initialize arguments to the incoming values specified. unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; @@ -2237,21 +2334,65 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// we can only evaluate any one basic block at most once. This set keeps /// track of what we have executed so we can detect recursive cases etc. SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; - + // CurInst - The current instruction we're evaluating. BasicBlock::iterator CurInst = F->begin()->begin(); - + // This is the main evaluation loop. while (1) { Constant *InstResult = 0; - + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants)) + return false; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->getOpcode() == Instruction::BitCast) { + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (const StructType *STy = dyn_cast<StructType>(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + return 0; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + } + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), @@ -2290,7 +2431,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); - InstResult = AllocaTmps.back(); + InstResult = AllocaTmps.back(); } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { // Debug info can safely be ignored here. @@ -2324,11 +2465,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else { if (Callee->getFunctionType()->isVarArg()) return false; - + Constant *RetVal; // Execute the call, if successful, use the return value. if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps)) + MutatedMemory, AllocaTmps, SimpleConstants, TD)) return false; InstResult = RetVal; } @@ -2342,7 +2483,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); if (!Cond) return false; // Cannot determine. - NewBB = BI->getSuccessor(!Cond->getZExtValue()); + NewBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { ConstantInt *Val = @@ -2358,20 +2499,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { if (RI->getNumOperands()) RetVal = getVal(Values, RI->getOperand(0)); - + CallStack.pop_back(); // return from fn. return true; // We succeeded at evaluating this ctor! } else { // invoke, unwind, unreachable. return false; // Cannot handle this terminator. } - + // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. if (!ExecutedBlocks.insert(NewBB)) return false; // looped! - + // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. @@ -2387,10 +2528,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // Did not know how to evaluate this! return false; } - - if (!CurInst->use_empty()) + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, TD); + Values[CurInst] = InstResult; - + } + // Advance program counter. ++CurInst; } @@ -2398,7 +2543,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F) { +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { /// MutatedMemory - For each store we execute, we update this map. Loads /// check this to get the most up-to-date value. If evaluation is successful, /// this state is committed to the process. @@ -2408,17 +2553,23 @@ static bool EvaluateStaticConstructor(Function *F) { /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. std::vector<GlobalVariable*> AllocaTmps; - + /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing /// unbounded. std::vector<Function*> CallStack; + /// SimpleConstants - These are constants we have checked and know to be + /// simple enough to live in a static initializer of a global. + SmallPtrSet<Constant*, 8> SimpleConstants; + // Call the function. Constant *RetValDummy; bool EvalSuccess = EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>(), CallStack, - MutatedMemory, AllocaTmps); + MutatedMemory, AllocaTmps, + SimpleConstants, TD); + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" @@ -2428,13 +2579,13 @@ static bool EvaluateStaticConstructor(Function *F) { E = MutatedMemory.end(); I != E; ++I) CommitValueTo(I->second, I->first); } - + // At this point, we are done interpreting. If we created any 'alloca' // temporaries, release them now. while (!AllocaTmps.empty()) { GlobalVariable *Tmp = AllocaTmps.back(); AllocaTmps.pop_back(); - + // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. @@ -2442,7 +2593,7 @@ static bool EvaluateStaticConstructor(Function *F) { Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); delete Tmp; } - + return EvalSuccess; } @@ -2454,7 +2605,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { std::vector<Function*> Ctors = ParseGlobalCtors(GCL); bool MadeChange = false; if (Ctors.empty()) return false; - + + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2467,12 +2619,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { } break; } - + // We cannot simplify external ctor functions. if (F->empty()) continue; - + // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F)) { + if (EvaluateStaticConstructor(F, TD)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; @@ -2480,9 +2632,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { continue; } } - + if (!MadeChange) return false; - + GCL = InstallGlobalCtors(GCL, Ctors); return true; } @@ -2546,21 +2698,21 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - + // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M); - + // Optimize global_ctors list. if (GlobalCtors) LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); - + // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); @@ -2568,9 +2720,9 @@ bool GlobalOpt::runOnModule(Module &M) { LocalChange |= OptimizeGlobalAliases(M); Changed |= LocalChange; } - + // TODO: Move all global ctors functions to the end of the module for code // layout. - + return Changed; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 1b3cf78..c7c2939 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -35,7 +35,9 @@ namespace { /// struct IPCP : public ModulePass { static char ID; // Pass identification, replacement for typeid - IPCP() : ModulePass(ID) {} + IPCP() : ModulePass(ID) { + initializeIPCPPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); private: @@ -46,7 +48,7 @@ namespace { char IPCP::ID = 0; INITIALIZE_PASS(IPCP, "ipconstprop", - "Interprocedural constant propagation", false, false); + "Interprocedural constant propagation", false, false) ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 340b70e..fbe90ce 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -7,17 +7,51 @@ // //===----------------------------------------------------------------------===// // -// This file implements the C bindings for libLLVMIPO.a, which implements -// several transformations over the LLVM intermediate representation. +// This file implements the common infrastructure (including C bindings) for +// libLLVMIPO.a, which implements several transformations over the LLVM +// intermediate representation. // //===----------------------------------------------------------------------===// #include "llvm-c/Transforms/IPO.h" +#include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Transforms/IPO.h" using namespace llvm; +void llvm::initializeIPO(PassRegistry &Registry) { + initializeArgPromotionPass(Registry); + initializeConstantMergePass(Registry); + initializeDAEPass(Registry); + initializeDAHPass(Registry); + initializeDTEPass(Registry); + initializeFunctionAttrsPass(Registry); + initializeGlobalDCEPass(Registry); + initializeGlobalOptPass(Registry); + initializeIPCPPass(Registry); + initializeAlwaysInlinerPass(Registry); + initializeSimpleInlinerPass(Registry); + initializeInternalizePassPass(Registry); + initializeLoopExtractorPass(Registry); + initializeBlockExtractorPassPass(Registry); + initializeSingleLoopExtractorPass(Registry); + initializeLowerSetJmpPass(Registry); + initializeMergeFunctionsPass(Registry); + initializePartialInlinerPass(Registry); + initializePruneEHPass(Registry); + initializeStripDeadPrototypesPassPass(Registry); + initializeStripSymbolsPass(Registry); + initializeStripDebugDeclarePass(Registry); + initializeStripDeadDebugInfoPass(Registry); + initializeStripNonDebugSymbolsPass(Registry); + initializeSRETPromotionPass(Registry); +} + +void LLVMInitializeIPO(LLVMPassRegistryRef R) { + initializeIPO(*unwrap(R)); +} + void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createArgumentPromotionPass()); } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index ecc60ad..ce795b7 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -36,7 +36,9 @@ namespace { InlineCostAnalyzer CA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000) {} + AlwaysInliner() : Inliner(ID, -2000000000) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -61,8 +63,11 @@ namespace { } char AlwaysInliner::ID = 0; -INITIALIZE_PASS(AlwaysInliner, "always-inline", - "Inliner for always_inline functions", false, false); +INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 9c6637d..0c5b3be 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -33,8 +33,12 @@ namespace { SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - SimpleInliner() : Inliner(ID) {} - SimpleInliner(int Threshold) : Inliner(ID, Threshold) {} + SimpleInliner() : Inliner(ID) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + SimpleInliner(int Threshold) : Inliner(ID, Threshold) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -56,8 +60,11 @@ namespace { } char SimpleInliner::ID = 0; -INITIALIZE_PASS(SimpleInliner, "inline", - "Function Integration/Inlining", false, false); +INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 4983e8e..37eafd7 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -52,7 +52,8 @@ Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} Inliner::Inliner(char &ID, int Threshold) - : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? + InlineLimit : Threshold) {} /// getAnalysisUsage - For this class, we declare that we require and preserve /// the call graph. If the derived class implements this method, it should @@ -74,7 +75,8 @@ InlinedArrayAllocasTy; /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas) { + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -91,7 +93,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); - // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. @@ -115,6 +116,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // SmallPtrSet<AllocaInst*, 16> UsedAllocas; + // When processing our SCC, check to see if CS was inlined from some other + // call site. For example, if we're processing "A" in this code: + // A() { B() } + // B() { x = alloca ... C() } + // C() { y = alloca ... } + // Assume that C was not inlined into B initially, and so we're processing A + // and decide to inline B into A. Doing this makes an alloca available for + // reuse and makes a callsite (C) available for inlining. When we process + // the C call site we don't want to do any alloca merging between X and Y + // because their scopes are not disjoint. We could make this smarter by + // keeping track of the inline history for each alloca in the + // InlinedArrayAllocas but this isn't likely to be a significant win. + if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. + return true; + // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); @@ -152,19 +168,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! - DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI); + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " + << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; + IFI.StaticAllocas[AllocaNo] = 0; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; - + // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark @@ -234,20 +252,25 @@ bool Inliner::shouldInline(CallSite CS) { if (Caller->hasLocalLinkage()) { int TotalSecondaryCost = 0; bool outerCallsFound = false; - bool allOuterCallsWillBeInlined = true; - bool someOuterCallWouldNotBeInlined = false; + // This bool tracks what happens if we do NOT inline C into B. + bool callerWillBeRemoved = true; + // This bool tracks what happens if we DO inline C into B. + bool inliningPreventsSomeOuterInline = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); I != E; ++I) { CallSite CS2(*I); // If this isn't a call to Caller (it could be some other sort - // of reference) skip it. - if (!CS2 || CS2.getCalledFunction() != Caller) + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2.getCalledFunction() != Caller) { + callerWillBeRemoved = false; continue; + } InlineCost IC2 = getInlineCost(CS2); if (IC2.isNever()) - allOuterCallsWillBeInlined = false; + callerWillBeRemoved = false; if (IC2.isAlways() || IC2.isNever()) continue; @@ -257,14 +280,14 @@ bool Inliner::shouldInline(CallSite CS) { float FudgeFactor2 = getInlineFudgeFactor(CS2); if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) - allOuterCallsWillBeInlined = false; + callerWillBeRemoved = false; // See if we have this case. We subtract off the penalty // for the call instruction, which we would be deleting. if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= (int)(CurrentThreshold2 * FudgeFactor2)) { - someOuterCallWouldNotBeInlined = true; + inliningPreventsSomeOuterInline = true; TotalSecondaryCost += Cost2; } } @@ -272,10 +295,10 @@ bool Inliner::shouldInline(CallSite CS) { // one is set very low by getInlineCost, in anticipation that Caller will // be removed entirely. We did not account for this above unless there // is only one caller of Caller. - if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) + if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - if (outerCallsFound && someOuterCallWouldNotBeInlined && + if (outerCallsFound && inliningPreventsSomeOuterInline && TotalSecondaryCost < Cost) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << Cost << @@ -401,7 +424,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee - // itself. If so, we'd be recursively inlinling the same function, + // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. int InlineHistoryID = CallSites[CSi].second; @@ -416,7 +439,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { continue; // Attempt to inline the function. - if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, + InlineHistoryID)) continue; ++NumInlined; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index a1d919f..9b9ebad 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -64,10 +64,11 @@ namespace { char InternalizePass::ID = 0; INITIALIZE_PASS(InternalizePass, "internalize", - "Internalize Global Symbols", false, false); + "Internalize Global Symbols", false, false) InternalizePass::InternalizePass(bool AllButMain) : ModulePass(ID), AllButMain(AllButMain){ + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); if (!APIList.empty()) // If a list is specified, use it as well. @@ -76,6 +77,7 @@ InternalizePass::InternalizePass(bool AllButMain) InternalizePass::InternalizePass(const std::vector<const char *>&exportList) : ModulePass(ID), AllButMain(false){ + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); for(std::vector<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { ExternalNames.insert(*itr); diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index f88dff6..848944d 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -37,7 +37,9 @@ namespace { unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : LoopPass(ID), NumLoops(numLoops) {} + : LoopPass(ID), NumLoops(numLoops) { + initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -50,8 +52,13 @@ namespace { } char LoopExtractor::ID = 0; -INITIALIZE_PASS(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false); +INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) namespace { /// SingleLoopExtractor - For bugpoint. @@ -63,7 +70,7 @@ namespace { char SingleLoopExtractor::ID = 0; INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", - "Extract at most one loop into a new function", false, false); + "Extract at most one loop into a new function", false, false) // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. @@ -159,7 +166,7 @@ namespace { char BlockExtractorPass::ID = 0; INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)", - false, false); + false, false) // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 6c715de..b545f0b 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -109,7 +109,9 @@ namespace { bool IsTransformableFunction(StringRef Name); public: static char ID; // Pass identification, replacement for typeid - LowerSetJmp() : ModulePass(ID) {} + LowerSetJmp() : ModulePass(ID) { + initializeLowerSetJmpPass(*PassRegistry::getPassRegistry()); + } void visitCallInst(CallInst& CI); void visitInvokeInst(InvokeInst& II); @@ -122,7 +124,7 @@ namespace { } // end anonymous namespace char LowerSetJmp::ID = 0; -INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false); +INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false) // run - Run the transformation on the program. We grab the function // prototypes for longjmp and setjmp. If they are used in the program, diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 5d838f9..cccffca 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -67,42 +67,87 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumThunksWritten, "Number of thunks generated"); +STATISTIC(NumAliasesWritten, "Number of aliases generated"); +STATISTIC(NumDoubleWeak, "Number of new functions created"); + +/// Creates a hash-code for the function which is the same for any two +/// functions that will compare equal, without looking at the instructions +/// inside the function. +static unsigned profileFunction(const Function *F) { + const FunctionType *FTy = F->getFunctionType(); -namespace { - /// MergeFunctions finds functions which will generate identical machine code, - /// by considering all pointer types to be equivalent. Once identified, - /// MergeFunctions will fold them by replacing a call to one to a call to a - /// bitcast of the other. - /// - class MergeFunctions : public ModulePass { - public: - static char ID; - MergeFunctions() : ModulePass(ID) {} - - bool runOnModule(Module &M); - - private: - /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G - /// may be deleted, or may be converted into a thunk. In either case, it - /// should never be visited again. - void MergeTwoFunctions(Function *F, Function *G) const; - - /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also - /// replace direct uses of G with bitcast(F). - void WriteThunk(Function *F, Function *G) const; - - TargetData *TD; - }; + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); } -char MergeFunctions::ID = 0; -INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false); +namespace { + +/// ComparableFunction - A struct that pairs together functions with a +/// TargetData so that we can keep them together as elements in the DenseSet. +class ComparableFunction { +public: + static const ComparableFunction EmptyKey; + static const ComparableFunction TombstoneKey; + static TargetData * const LookupOnly; + + ComparableFunction(Function *Func, TargetData *TD) + : Func(Func), Hash(profileFunction(Func)), TD(TD) {} + + Function *getFunc() const { return Func; } + unsigned getHash() const { return Hash; } + TargetData *getTD() const { return TD; } + + // Drops AssertingVH reference to the function. Outside of debug mode, this + // does nothing. + void release() { + assert(Func && + "Attempted to release function twice, or release empty/tombstone!"); + Func = NULL; + } + +private: + explicit ComparableFunction(unsigned Hash) + : Func(NULL), Hash(Hash), TD(NULL) {} + + AssertingVH<Function> Func; + unsigned Hash; + TargetData *TD; +}; + +const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); +const ComparableFunction ComparableFunction::TombstoneKey = + ComparableFunction(1); +TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1); -ModulePass *llvm::createMergeFunctionsPass() { - return new MergeFunctions(); +} + +namespace llvm { + template <> + struct DenseMapInfo<ComparableFunction> { + static ComparableFunction getEmptyKey() { + return ComparableFunction::EmptyKey; + } + static ComparableFunction getTombstoneKey() { + return ComparableFunction::TombstoneKey; + } + static unsigned getHashValue(const ComparableFunction &CF) { + return CF.getHash(); + } + static bool isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS); + }; } namespace { + /// FunctionComparator - Compares two functions to determine whether or not /// they will generate machine code with the same behaviour. TargetData is /// used if available. The comparator always fails conservatively (erring on the @@ -111,34 +156,34 @@ class FunctionComparator { public: FunctionComparator(const TargetData *TD, const Function *F1, const Function *F2) - : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {} + : F1(F1), F2(F2), TD(TD) {} - /// Compare - test whether the two functions have equivalent behaviour. - bool Compare(); + /// Test whether the two functions have equivalent behaviour. + bool compare(); private: - /// Compare - test whether two basic blocks have equivalent behaviour. - bool Compare(const BasicBlock *BB1, const BasicBlock *BB2); + /// Test whether two basic blocks have equivalent behaviour. + bool compare(const BasicBlock *BB1, const BasicBlock *BB2); - /// Enumerate - Assign or look up previously assigned numbers for the two - /// values, and return whether the numbers are equal. Numbers are assigned in - /// the order visited. - bool Enumerate(const Value *V1, const Value *V2); + /// Assign or look up previously assigned numbers for the two values, and + /// return whether the numbers are equal. Numbers are assigned in the order + /// visited. + bool enumerate(const Value *V1, const Value *V2); - /// isEquivalentOperation - Compare two Instructions for equivalence, similar - /// to Instruction::isSameOperationAs but with modifications to the type + /// Compare two Instructions for equivalence, similar to + /// Instruction::isSameOperationAs but with modifications to the type /// comparison. bool isEquivalentOperation(const Instruction *I1, const Instruction *I2) const; - /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic. + /// Compare two GEPs for equivalent pointer arithmetic. bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); bool isEquivalentGEP(const GetElementPtrInst *GEP1, const GetElementPtrInst *GEP2) { return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); } - /// isEquivalentType - Compare two Types, treating all pointer types as equal. + /// Compare two Types, treating all pointer types as equal. bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; // The two functions undergoing comparison. @@ -146,20 +191,26 @@ private: const TargetData *TD; - typedef DenseMap<const Value *, unsigned long> IDMap; - IDMap Map1, Map2; - unsigned long IDMap1Count, IDMap2Count; + DenseMap<const Value *, const Value *> id_map; + DenseSet<const Value *> seen_values; }; + } -/// isEquivalentType - any two pointers in the same address space are -/// equivalent. Otherwise, standard type equivalence rules apply. +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. bool FunctionComparator::isEquivalentType(const Type *Ty1, const Type *Ty2) const { if (Ty1 == Ty2) return true; - if (Ty1->getTypeID() != Ty2->getTypeID()) + if (Ty1->getTypeID() != Ty2->getTypeID()) { + if (TD) { + LLVMContext &Ctx = Ty1->getContext(); + if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true; + if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true; + } return false; + } switch(Ty1->getTypeID()) { default: @@ -167,6 +218,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, // Fall through in Release mode. case Type::IntegerTyID: case Type::OpaqueTyID: + case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -225,21 +277,18 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return ATy1->getNumElements() == ATy2->getNumElements() && isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); } - - case Type::VectorTyID: { - const VectorType *VTy1 = cast<VectorType>(Ty1); - const VectorType *VTy2 = cast<VectorType>(Ty2); - return VTy1->getNumElements() == VTy2->getNumElements() && - isEquivalentType(VTy1->getElementType(), VTy2->getElementType()); - } } } -/// isEquivalentOperation - determine whether the two operations are the same -/// except that pointer-to-A and pointer-to-B are equivalent. This should be -/// kept in sync with Instruction::isSameOperationAs. +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. bool FunctionComparator::isEquivalentOperation(const Instruction *I1, const Instruction *I2) const { + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to isEquivalentType. + // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top + // * because of the above, we don't test for the tail bit on calls later on if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || !isEquivalentType(I1->getType(), I2->getType()) || @@ -263,14 +312,11 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) - return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && - CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<CallInst>(I2)->getAttributes().getRawPointer(); + return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes() == cast<CallInst>(I2)->getAttributes(); if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<InvokeInst>(I2)->getAttributes().getRawPointer(); + CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) return false; @@ -291,8 +337,7 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, return true; } -/// isEquivalentGEP - determine whether two GEP operations perform the same -/// underlying arithmetic. +// Determine whether two GEP operations perform the same underlying arithmetic. bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { // When we have target data, we can reduce the GEP down to the value in bytes @@ -315,17 +360,17 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, return false; for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) + if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) return false; } return true; } -/// Enumerate - Compare two values used by the two functions under pair-wise -/// comparison. If this is the first time the values are seen, they're added to -/// the mapping so that we will detect mismatches on next use. -bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { +// Compare two values used by the two functions under pair-wise comparison. If +// this is the first time the values are seen, they're added to the mapping so +// that we will detect mismatches on next use. +bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { // Check for function @f1 referring to itself and function @f2 referring to // itself, or referring to each other, or both referring to either of them. // They're all equivalent if the two functions are otherwise equivalent. @@ -334,35 +379,44 @@ bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { if (V1 == F2 && V2 == F1) return true; - // TODO: constant expressions with GEP or references to F1 or F2. - if (isa<Constant>(V1)) - return V1 == V2; - - if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { - const InlineAsm *IA1 = cast<InlineAsm>(V1); - const InlineAsm *IA2 = cast<InlineAsm>(V2); - return IA1->getAsmString() == IA2->getAsmString() && - IA1->getConstraintString() == IA2->getConstraintString(); + if (const Constant *C1 = dyn_cast<Constant>(V1)) { + if (V1 == V2) return true; + const Constant *C2 = dyn_cast<Constant>(V2); + if (!C2) return false; + // TODO: constant expressions with GEP or references to F1 or F2. + if (C1->isNullValue() && C2->isNullValue() && + isEquivalentType(C1->getType(), C2->getType())) + return true; + // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 + // then they must have equal bit patterns. + return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && + C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType()); } - unsigned long &ID1 = Map1[V1]; - if (!ID1) - ID1 = ++IDMap1Count; + if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2)) + return V1 == V2; - unsigned long &ID2 = Map2[V2]; - if (!ID2) - ID2 = ++IDMap2Count; + // Check that V1 maps to V2. If we find a value that V1 maps to then we simply + // check whether it's equal to V2. When there is no mapping then we need to + // ensure that V2 isn't already equivalent to something else. For this + // purpose, we track the V2 values in a set. - return ID1 == ID2; + const Value *&map_elem = id_map[V1]; + if (map_elem) + return map_elem == V2; + if (!seen_values.insert(V2).second) + return false; + map_elem = V2; + return true; } -/// Compare - test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { +// Test whether two basic blocks have equivalent behaviour. +bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); do { - if (!Enumerate(F1I, F2I)) + if (!enumerate(F1I, F2I)) return false; if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { @@ -370,7 +424,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { if (!GEP2) return false; - if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; if (!isEquivalentGEP(GEP1, GEP2)) @@ -384,7 +438,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { Value *OpF1 = F1I->getOperand(i); Value *OpF2 = F2I->getOperand(i); - if (!Enumerate(OpF1, OpF2)) + if (!enumerate(OpF1, OpF2)) return false; if (OpF1->getValueID() != OpF2->getValueID() || @@ -399,8 +453,8 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { return F1I == F1E && F2I == F2E; } -/// Compare - test whether the two functions have equivalent behaviour. -bool FunctionComparator::Compare() { +// Test whether the two functions have equivalent behaviour. +bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -431,14 +485,14 @@ bool FunctionComparator::Compare() { return false; assert(F1->arg_size() == F2->arg_size() && - "Identical functions have a different number of args."); + "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. for (Function::const_arg_iterator f1i = F1->arg_begin(), f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!Enumerate(f1i, f2i)) - llvm_unreachable("Arguments repeat"); + if (!enumerate(f1i, f2i)) + llvm_unreachable("Arguments repeat!"); } // We do a CFG-ordered walk since the actual ordering of the blocks in the @@ -456,7 +510,7 @@ bool FunctionComparator::Compare() { const BasicBlock *F1BB = F1BBs.pop_back_val(); const BasicBlock *F2BB = F2BBs.pop_back_val(); - if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB)) + if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) return false; const TerminatorInst *F1TI = F1BB->getTerminator(); @@ -474,23 +528,190 @@ bool FunctionComparator::Compare() { return true; } -/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace -/// direct uses of G with bitcast(F). -void MergeFunctions::WriteThunk(Function *F, Function *G) const { +namespace { + +/// MergeFunctions finds functions which will generate identical machine code, +/// by considering all pointer types to be equivalent. Once identified, +/// MergeFunctions will fold them by replacing a call to one to a call to a +/// bitcast of the other. +/// +class MergeFunctions : public ModulePass { +public: + static char ID; + MergeFunctions() + : ModulePass(ID), HasGlobalAliases(false) { + initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M); + +private: + typedef DenseSet<ComparableFunction> FnSetType; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + std::vector<WeakVH> Deferred; + + /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// equal to one that's already present. + bool insert(ComparableFunction &NewF); + + /// Remove a Function from the FnSet and queue it up for a second sweep of + /// analysis. + void remove(Function *F); + + /// Find the functions that use this Value and remove them from FnSet and + /// queue the functions. + void removeUsers(Value *V); + + /// Replace all direct calls of Old with calls of New. Will bitcast New if + /// necessary to make types match. + void replaceDirectCallers(Function *Old, Function *New); + + /// Merge two equivalent functions. Upon completion, G may be deleted, or may + /// be converted into a thunk. In either case, it should never be visited + /// again. + void mergeTwoFunctions(Function *F, Function *G); + + /// Replace G with a thunk or an alias to F. Deletes G. + void writeThunkOrAlias(Function *F, Function *G); + + /// Replace G with a simple tail call to bitcast(F). Also replace direct uses + /// of G with bitcast(F). Deletes G. + void writeThunk(Function *F, Function *G); + + /// Replace G with an alias to F. Deletes G. + void writeAlias(Function *F, Function *G); + + /// The set of all distinct functions. Use the insert() and remove() methods + /// to modify it. + FnSetType FnSet; + + /// TargetData for more accurate GEP comparisons. May be NULL. + TargetData *TD; + + /// Whether or not the target supports global aliases. + bool HasGlobalAliases; +}; + +} // end anonymous namespace + +char MergeFunctions::ID = 0; +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + TD = getAnalysisIfAvailable<TargetData>(); + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + Deferred.push_back(WeakVH(I)); + } + FnSet.resize(Deferred.size()); + + do { + std::vector<WeakVH> Worklist; + Deferred.swap(Worklist); + + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); + DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); + + // Insert only strong functions and merge them. Strong function merging + // always deletes one of them. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + !F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + + // Insert only weak functions and merge them. By doing these second we + // create thunks to the strong function when possible. When two weak + // functions are identical, we create a new strong function with two weak + // weak thunks to it which are identical but not mergable. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + } while (!Deferred.empty()); + + FnSet.clear(); + + return Changed; +} + +bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS) { + if (LHS.getFunc() == RHS.getFunc() && + LHS.getHash() == RHS.getHash()) + return true; + if (!LHS.getFunc() || !RHS.getFunc()) + return false; + + // One of these is a special "underlying pointer comparison only" object. + if (LHS.getTD() == ComparableFunction::LookupOnly || + RHS.getTD() == ComparableFunction::LookupOnly) + return false; + + assert(LHS.getTD() == RHS.getTD() && + "Comparing functions for different targets"); + + return FunctionComparator(LHS.getTD(), LHS.getFunc(), + RHS.getFunc()).compare(); +} + +// Replace direct callers of Old with New. +void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { + Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) { + remove(CS.getInstruction()->getParent()->getParent()); + TheIter.getUse().set(BitcastNew); + } + } +} + +// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. +void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { + if (HasGlobalAliases && G->hasUnnamedAddr()) { + if (G->hasExternalLinkage() || G->hasLocalLinkage() || + G->hasWeakLinkage()) { + writeAlias(F, G); + return; + } + } + + writeThunk(F, G); +} + +// Replace G with a simple tail call to bitcast(F). Also replace direct uses +// of G with bitcast(F). Deletes G. +void MergeFunctions::writeThunk(Function *F, Function *G) { if (!G->mayBeOverridden()) { // Redirect direct callers of G to F. - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); - UI != UE;) { - Value::use_iterator TheIter = UI; - ++UI; - CallSite CS(*TheIter); - if (CS && CS.isCallee(TheIter)) - TheIter.getUse().set(BitcastF); - } + replaceDirectCallers(G, F); } - // If G was internal then we may have replaced all uses if G with F. If so, + // If G was internal then we may have replaced all uses of G with F. If so, // stop here and delete G. There's no need for a thunk. if (G->hasLocalLinkage() && G->use_empty()) { G->eraseFromParent(); @@ -522,131 +743,126 @@ void MergeFunctions::WriteThunk(Function *F, Function *G) const { NewG->copyAttributesFrom(G); NewG->takeName(G); + removeUsers(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); + + DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); + ++NumThunksWritten; } -/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, -/// Function G is deleted. -void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const { - if (F->isWeakForLinker()) { - assert(G->isWeakForLinker()); +// Replace G with an alias to F and delete G. +void MergeFunctions::writeAlias(Function *F, Function *G) { + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", + BitcastF, G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + removeUsers(G); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); + + DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); + ++NumAliasesWritten; +} + +// Merge two equivalent functions. Upon completion, Function G is deleted. +void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { + if (F->mayBeOverridden()) { + assert(G->mayBeOverridden()); + + if (HasGlobalAliases) { + // Make them both thunks to the same internal function. + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + removeUsers(F); + F->replaceAllUsesWith(H); - // Make them both thunks to the same internal function. - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); + writeAlias(F, G); + writeAlias(F, H); - WriteThunk(F, G); - WriteThunk(F, H); + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::PrivateLinkage); + } else { + // We can't merge them. Instead, pick one and update all direct callers + // to call it and hope that we improve the instruction cache hit rate. + replaceDirectCallers(G, F); + } - F->setAlignment(MaxAlignment); - F->setLinkage(GlobalValue::InternalLinkage); + ++NumDoubleWeak; } else { - WriteThunk(F, G); + writeThunkOrAlias(F, G); } ++NumFunctionsMerged; } -static unsigned ProfileFunction(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); - - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); - return ID.ComputeHash(); -} - -class ComparableFunction { -public: - ComparableFunction(Function *Func, TargetData *TD) - : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {} +// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// that was already inserted. +bool MergeFunctions::insert(ComparableFunction &NewF) { + std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); + if (Result.second) { + DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + return false; + } - AssertingVH<Function> const Func; - const unsigned Hash; - TargetData * const TD; -}; + const ComparableFunction &OldF = *Result.first; -struct MergeFunctionsEqualityInfo { - static ComparableFunction *getEmptyKey() { - return reinterpret_cast<ComparableFunction*>(0); - } - static ComparableFunction *getTombstoneKey() { - return reinterpret_cast<ComparableFunction*>(-1); - } - static unsigned getHashValue(const ComparableFunction *CF) { - return CF->Hash; - } - static bool isEqual(const ComparableFunction *LHS, - const ComparableFunction *RHS) { - if (LHS == RHS) - return true; - if (LHS == getEmptyKey() || LHS == getTombstoneKey() || - RHS == getEmptyKey() || RHS == getTombstoneKey()) - return false; - assert(LHS->TD == RHS->TD && "Comparing functions for different targets"); - return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare(); - } -}; + // Never thunk a strong function to a weak function. + assert(!OldF.getFunc()->mayBeOverridden() || + NewF.getFunc()->mayBeOverridden()); -bool MergeFunctions::runOnModule(Module &M) { - typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType; + DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " + << NewF.getFunc()->getName() << '\n'); - bool Changed = false; - TD = getAnalysisIfAvailable<TargetData>(); + Function *DeleteF = NewF.getFunc(); + NewF.release(); + mergeTwoFunctions(OldF.getFunc(), DeleteF); + return true; +} - std::vector<Function *> Funcs; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) - Funcs.push_back(F); +// Remove a function from FnSet. If it was already in FnSet, add it to Deferred +// so that we'll look at it in the next round. +void MergeFunctions::remove(Function *F) { + // We need to make sure we remove F, not a function "equal" to F per the + // function equality comparator. + // + // The special "lookup only" ComparableFunction bypasses the expensive + // function comparison in favour of a pointer comparison on the underlying + // Function*'s. + ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); + if (FnSet.erase(CF)) { + DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + Deferred.push_back(F); } +} - bool LocalChanged; - do { - LocalChanged = false; - - FnSetType FnSet; - for (unsigned i = 0, e = Funcs.size(); i != e;) { - Function *F = Funcs[i]; - ComparableFunction *NewF = new ComparableFunction(F, TD); - std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); - if (!Result.second) { - ComparableFunction *&OldF = *Result.first; - assert(OldF && "Expected a hash collision"); - - // NewF will be deleted in favour of OldF unless NewF is strong and - // OldF is weak in which case swap them to keep the strong definition. - - if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker()) - std::swap(OldF, NewF); - - DEBUG(dbgs() << " " << OldF->Func->getName() << " == " - << NewF->Func->getName() << '\n'); - - Funcs.erase(Funcs.begin() + i); - --e; - - Function *DeleteF = NewF->Func; - delete NewF; - MergeTwoFunctions(OldF->Func, DeleteF); - LocalChanged = true; - Changed = true; - } else { - ++i; +// For each instruction used by the value, remove() the function that contains +// the instruction. This should happen right before a call to RAUW. +void MergeFunctions::removeUsers(Value *V) { + std::vector<Value *> Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + Use &U = UI.getUse(); + if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { + remove(I->getParent()->getParent()); + } else if (isa<GlobalValue>(U.getUser())) { + // do nothing + } else if (Constant *C = dyn_cast<Constant>(U.getUser())) { + for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end(); + CUI != CUE; ++CUI) + Worklist.push_back(*CUI); } } - DeleteContainerPointers(FnSet); - } while (LocalChanged); - - return Changed; + } } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 432f7c5..2afd029 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -30,7 +30,9 @@ namespace { struct PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - PartialInliner() : ModulePass(ID) {} + PartialInliner() : ModulePass(ID) { + initializePartialInlinerPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module& M); @@ -41,7 +43,7 @@ namespace { char PartialInliner::ID = 0; INITIALIZE_PASS(PartialInliner, "partial-inliner", - "Partial Inliner", false, false); + "Partial Inliner", false, false) ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } @@ -67,7 +69,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { return 0; // Clone the function, so that we can hack away on it. - ValueMap<const Value*, Value*> VMap; + ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp deleted file mode 100644 index 4a99a41..0000000 --- a/lib/Transforms/IPO/PartialSpecialization.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//===-- PartialSpecialization.cpp - Specialize for common constants--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass finds function arguments that are often a common constant and -// specializes a version of the called function for that constant. -// -// This pass simply does the cloning for functions it specializes. It depends -// on IPSCCP and DAE to clean up the results. -// -// The initial heuristic favors constant arguments that are used in control -// flow. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "partialspecialization" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constant.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Support/CallSite.h" -#include "llvm/ADT/DenseSet.h" -#include <map> -using namespace llvm; - -STATISTIC(numSpecialized, "Number of specialized functions created"); -STATISTIC(numReplaced, "Number of callers replaced by specialization"); - -// Maximum number of arguments markable interested -static const int MaxInterests = 6; - -// Call must be used at least occasionally -static const int CallsMin = 5; - -// Must have 10% of calls having the same constant to specialize on -static const double ConstValPercent = .1; - -namespace { - typedef SmallVector<int, MaxInterests> InterestingArgVector; - class PartSpec : public ModulePass { - void scanForInterest(Function&, InterestingArgVector&); - int scanDistribution(Function&, int, std::map<Constant*, int>&); - public : - static char ID; // Pass identification, replacement for typeid - PartSpec() : ModulePass(ID) {} - bool runOnModule(Module &M); - }; -} - -char PartSpec::ID = 0; -INITIALIZE_PASS(PartSpec, "partialspecialization", - "Partial Specialization", false, false); - -// Specialize F by replacing the arguments (keys) in replacements with the -// constants (values). Replace all calls to F with those constants with -// a call to the specialized function. Returns the specialized function -static Function* -SpecializeFunction(Function* F, - ValueMap<const Value*, Value*>& replacements) { - // arg numbers of deleted arguments - DenseMap<unsigned, const Argument*> deleted; - for (ValueMap<const Value*, Value*>::iterator - repb = replacements.begin(), repe = replacements.end(); - repb != repe; ++repb) { - Argument const *arg = cast<const Argument>(repb->first); - deleted[arg->getArgNo()] = arg; - } - - Function* NF = CloneFunction(F, replacements, - /*ModuleLevelChanges=*/false); - NF->setLinkage(GlobalValue::InternalLinkage); - F->getParent()->getFunctionList().push_back(NF); - - for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); - ii != ee; ) { - Value::use_iterator i = ii; - ++ii; - User *U = *i; - CallSite CS(U); - if (CS) { - if (CS.getCalledFunction() == F) { - SmallVector<Value*, 6> args; - // Assemble the non-specialized arguments for the updated callsite. - // In the process, make sure that the specialized arguments are - // constant and match the specialization. If that's not the case, - // this callsite needs to call the original or some other - // specialization; don't change it here. - CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end(); - for (CallSite::arg_iterator ai = as; ai != ae; ++ai) { - DenseMap<unsigned, const Argument*>::iterator delit = deleted.find( - std::distance(as, ai)); - if (delit == deleted.end()) - args.push_back(cast<Value>(ai)); - else { - Constant *ci = dyn_cast<Constant>(ai); - if (!(ci && ci == replacements[delit->second])) - goto next_use; - } - } - Value* NCall; - if (CallInst *CI = dyn_cast<CallInst>(U)) { - NCall = CallInst::Create(NF, args.begin(), args.end(), - CI->getName(), CI); - cast<CallInst>(NCall)->setTailCall(CI->isTailCall()); - cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv()); - } else { - InvokeInst *II = cast<InvokeInst>(U); - NCall = InvokeInst::Create(NF, II->getNormalDest(), - II->getUnwindDest(), - args.begin(), args.end(), - II->getName(), II); - cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv()); - } - CS.getInstruction()->replaceAllUsesWith(NCall); - CS.getInstruction()->eraseFromParent(); - ++numReplaced; - } - } - next_use:; - } - return NF; -} - - -bool PartSpec::runOnModule(Module &M) { - bool Changed = false; - for (Module::iterator I = M.begin(); I != M.end(); ++I) { - Function &F = *I; - if (F.isDeclaration() || F.mayBeOverridden()) continue; - InterestingArgVector interestingArgs; - scanForInterest(F, interestingArgs); - - // Find the first interesting Argument that we can specialize on - // If there are multiple interesting Arguments, then those will be found - // when processing the cloned function. - bool breakOuter = false; - for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) { - std::map<Constant*, int> distribution; - int total = scanDistribution(F, interestingArgs[x], distribution); - if (total > CallsMin) - for (std::map<Constant*, int>::iterator ii = distribution.begin(), - ee = distribution.end(); ii != ee; ++ii) - if (total > ii->second && ii->first && - ii->second > total * ConstValPercent) { - ValueMap<const Value*, Value*> m; - Function::arg_iterator arg = F.arg_begin(); - for (int y = 0; y < interestingArgs[x]; ++y) - ++arg; - m[&*arg] = ii->first; - SpecializeFunction(&F, m); - ++numSpecialized; - breakOuter = true; - Changed = true; - } - } - } - return Changed; -} - -/// scanForInterest - This function decides which arguments would be worth -/// specializing on. -void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { - for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); - ii != ee; ++ii) { - for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); - ui != ue; ++ui) { - - bool interesting = false; - User *U = *ui; - if (isa<CmpInst>(U)) interesting = true; - else if (isa<CallInst>(U)) - interesting = ui->getOperand(0) == ii; - else if (isa<InvokeInst>(U)) - interesting = ui->getOperand(0) == ii; - else if (isa<SwitchInst>(U)) interesting = true; - else if (isa<BranchInst>(U)) interesting = true; - - if (interesting) { - args.push_back(std::distance(F.arg_begin(), ii)); - break; - } - } - } -} - -/// scanDistribution - Construct a histogram of constants for arg of F at arg. -int PartSpec::scanDistribution(Function& F, int arg, - std::map<Constant*, int>& dist) { - bool hasIndirect = false; - int total = 0; - for (Value::use_iterator ii = F.use_begin(), ee = F.use_end(); - ii != ee; ++ii) { - User *U = *ii; - CallSite CS(U); - if (CS && CS.getCalledFunction() == &F) { - ++dist[dyn_cast<Constant>(CS.getArgument(arg))]; - ++total; - } else - hasIndirect = true; - } - - // Preserve the original address taken function even if all other uses - // will be specialized. - if (hasIndirect) ++total; - return total; -} - -ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); } diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 09ac76f..d91c2c4 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -37,7 +37,9 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized"); namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(ID) {} + PruneEH() : CallGraphSCCPass(ID) { + initializePruneEHPass(*PassRegistry::getPassRegistry()); + } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -48,8 +50,11 @@ namespace { } char PruneEH::ID = 0; -INITIALIZE_PASS(PruneEH, "prune-eh", - "Remove unused exception handling info", false, false); +INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) Pass *llvm::createPruneEHPass() { return new PruneEH(); } diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index ee10ad0..b5f09ec 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -29,7 +29,9 @@ namespace { class StripDeadPrototypesPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(ID) { } + StripDeadPrototypesPass() : ModulePass(ID) { + initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); }; @@ -37,7 +39,7 @@ public: char StripDeadPrototypesPass::ID = 0; INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", - "Strip Unused Function Prototypes", false, false); + "Strip Unused Function Prototypes", false, false) bool StripDeadPrototypesPass::runOnModule(Module &M) { bool MadeChange = false; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 20b7b8f..a690765 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -39,7 +39,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripSymbols(bool ODI = false) - : ModulePass(ID), OnlyDebugInfo(ODI) {} + : ModulePass(ID), OnlyDebugInfo(ODI) { + initializeStripSymbolsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -52,7 +54,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -65,7 +69,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -78,7 +84,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDeadDebugInfo() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -90,7 +98,7 @@ namespace { char StripSymbols::ID = 0; INITIALIZE_PASS(StripSymbols, "strip", - "Strip all symbols from a module", false, false); + "Strip all symbols from a module", false, false) ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { return new StripSymbols(OnlyDebugInfo); @@ -99,7 +107,7 @@ ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { char StripNonDebugSymbols::ID = 0; INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug", "Strip all symbols, except dbg symbols, from a module", - false, false); + false, false) ModulePass *llvm::createStripNonDebugSymbolsPass() { return new StripNonDebugSymbols(); @@ -107,7 +115,7 @@ ModulePass *llvm::createStripNonDebugSymbolsPass() { char StripDebugDeclare::ID = 0; INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare", - "Strip all llvm.dbg.declare intrinsics", false, false); + "Strip all llvm.dbg.declare intrinsics", false, false) ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); @@ -115,7 +123,7 @@ ModulePass *llvm::createStripDebugDeclarePass() { char StripDeadDebugInfo::ID = 0; INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info", - "Strip debug info for unused symbols", false, false); + "Strip debug info for unused symbols", false, false) ModulePass *llvm::createStripDeadDebugInfoPass() { return new StripDeadDebugInfo(); diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index b82b03f..584deac 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -50,7 +50,9 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(ID) {} + SRETPromotion() : CallGraphSCCPass(ID) { + initializeSRETPromotionPass(*PassRegistry::getPassRegistry()); + } private: CallGraphNode *PromoteReturn(CallGraphNode *CGN); @@ -61,8 +63,11 @@ namespace { } char SRETPromotion::ID = 0; -INITIALIZE_PASS(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false); +INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion", + "Promote sret arguments to multiple ret values", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(SRETPromotion, "sretpromotion", + "Promote sret arguments to multiple ret values", false, false) Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); |