diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms')
89 files changed, 2705 insertions, 3204 deletions
diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.cpp b/contrib/llvm/lib/Transforms/Hello/Hello.cpp index abfa514..838d550 100644 --- a/contrib/llvm/lib/Transforms/Hello/Hello.cpp +++ b/contrib/llvm/lib/Transforms/Hello/Hello.cpp @@ -25,7 +25,7 @@ namespace { // Hello - The first implementation, without getAnalysisUsage. struct Hello : public FunctionPass { static char ID; // Pass identification, replacement for typeid - Hello() : FunctionPass(&ID) {} + Hello() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { ++HelloCounter; @@ -37,13 +37,13 @@ namespace { } char Hello::ID = 0; -static RegisterPass<Hello> X("hello", "Hello World Pass"); +INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false); namespace { // Hello2 - The second implementation with getAnalysisUsage implemented. struct Hello2 : public FunctionPass { static char ID; // Pass identification, replacement for typeid - Hello2() : FunctionPass(&ID) {} + Hello2() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { ++HelloCounter; @@ -60,5 +60,6 @@ namespace { } char Hello2::ID = 0; -static RegisterPass<Hello2> -Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)"); +INITIALIZE_PASS(Hello2, "hello2", + "Hello World Pass (with getAnalysisUsage implemented)", + false, false); diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 28ea079..0c77e1f 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -67,7 +67,7 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(&ID), maxElements(maxElements) {} + : CallGraphSCCPass(ID), maxElements(maxElements) {} /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; @@ -84,8 +84,8 @@ namespace { } char ArgPromotion::ID = 0; -static RegisterPass<ArgPromotion> -X("argpromotion", "Promote 'by reference' arguments to scalars"); +INITIALIZE_PASS(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false); Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); @@ -208,8 +208,8 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { // have direct callees. for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); UI != E; ++UI) { - CallSite CS = CallSite::get(*UI); - assert(CS.getInstruction() && "Should only have direct calls!"); + CallSite CS(*UI); + assert(CS && "Should only have direct calls!"); if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) return false; @@ -619,14 +619,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); - // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { - CallSite CS = CallSite::get(F->use_back()); + CallSite CS(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 3c05f88..64e8d79 100644 --- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,10 +19,12 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -31,7 +33,7 @@ STATISTIC(NumMerged, "Number of global constants merged"); namespace { struct ConstantMerge : public ModulePass { static char ID; // Pass identification, replacement for typeid - ConstantMerge() : ModulePass(&ID) {} + ConstantMerge() : ModulePass(ID) {} // run - For this pass, process all of the globals in the module, // eliminating duplicate constants. @@ -41,12 +43,32 @@ namespace { } char ConstantMerge::ID = 0; -static RegisterPass<ConstantMerge> -X("constmerge", "Merge Duplicate Global Constants"); +INITIALIZE_PASS(ConstantMerge, "constmerge", + "Merge Duplicate Global Constants", false, false); ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + + +/// Find values that are marked as llvm.used. +static void FindUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + if (LLVMUsed == 0) return; + ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + bool ConstantMerge::runOnModule(Module &M) { + // Find all the globals that are marked "used". These cannot be merged. + SmallPtrSet<const GlobalValue*, 8> UsedGlobals; + FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); + FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); + // Map unique constant/section pairs to globals. We don't want to merge // globals in different sections. DenseMap<Constant*, GlobalVariable*> CMap; @@ -79,9 +101,13 @@ bool ConstantMerge::runOnModule(Module &M) { // Only process constants with initializers in the default addres space. if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || - GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty()) + GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) continue; + + Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 475eee8..47df235 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -122,11 +122,11 @@ namespace { protected: // DAH uses this to specify a different ID. - explicit DAE(void *ID) : ModulePass(ID) {} + explicit DAE(char &ID) : ModulePass(ID) {} public: static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(&ID) {} + DAE() : ModulePass(ID) {} bool runOnModule(Module &M); @@ -151,8 +151,7 @@ namespace { char DAE::ID = 0; -static RegisterPass<DAE> -X("deadargelim", "Dead Argument Elimination"); +INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false); namespace { /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but @@ -160,15 +159,16 @@ namespace { /// by bugpoint. struct DAH : public DAE { static char ID; - DAH() : DAE(&ID) {} + DAH() : DAE(ID) {} virtual bool ShouldHackArguments() const { return true; } }; } char DAH::ID = 0; -static RegisterPass<DAH> -Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)"); +INITIALIZE_PASS(DAH, "deadarghaX0r", + "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", + false, false); /// createDeadArgEliminationPass - This pass removes arguments from functions /// which are not used by the body of the function. @@ -220,11 +220,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // std::vector<Value*> Args; while (!Fn.use_empty()) { - CallSite CS = CallSite::get(Fn.use_back()); + CallSite CS(Fn.use_back()); Instruction *Call = CS.getInstruction(); // Pass all the same arguments. - Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs); + Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); // Drop any attributes that were on the vararg arguments. AttrListPtr PAL = CS.getAttributes(); @@ -250,8 +250,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } - if (MDNode *N = Call->getDbgMetadata()) - New->setDbgMetadata(N); + New->setDebugLoc(Call->getDebugLoc()); Args.clear(); @@ -725,7 +724,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // std::vector<Value*> Args; while (!F->use_empty()) { - CallSite CS = CallSite::get(F->use_back()); + CallSite CS(F->use_back()); Instruction *Call = CS.getInstruction(); AttributesVec.clear(); @@ -780,8 +779,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } - if (MDNode *N = Call->getDbgMetadata()) - New->setDbgMetadata(N); + New->setDebugLoc(Call->getDebugLoc()); Args.clear(); diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp index 662fbb5..5dc50c5 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -26,7 +26,7 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); namespace { struct DTE : public ModulePass { static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(&ID) {} + DTE() : ModulePass(ID) {} // doPassInitialization - For this pass, it removes global symbol table // entries for primitive types. These are never used for linking in GCC and @@ -45,7 +45,7 @@ namespace { } char DTE::ID = 0; -static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination"); +INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false); ModulePass *llvm::createDeadTypeEliminationPass() { return new DTE(); diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp index 7f67e48..45c5fe7 100644 --- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -17,15 +17,15 @@ #include "llvm/Pass.h" #include "llvm/Constants.h" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SetVector.h" #include <algorithm> using namespace llvm; namespace { /// @brief A pass to extract specific functions and their dependencies. class GVExtractorPass : public ModulePass { - std::vector<GlobalValue*> Named; + SetVector<GlobalValue *> Named; bool deleteStuff; - bool reLink; public: static char ID; // Pass identification, replacement for typeid @@ -33,135 +33,42 @@ namespace { /// specified function. Otherwise, it deletes as much of the module as /// possible, except for the function specified. /// - explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true, - bool relinkCallees = false) - : ModulePass(&ID), Named(GVs), deleteStuff(deleteS), - reLink(relinkCallees) {} + explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true) + : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) { - if (Named.size() == 0) { - return false; // Nothing to extract - } - - - if (deleteStuff) - return deleteGV(); - M.setModuleInlineAsm(""); - return isolateGV(M); - } - - bool deleteGV() { - for (std::vector<GlobalValue*>::iterator GI = Named.begin(), - GE = Named.end(); GI != GE; ++GI) { - if (Function* NamedFunc = dyn_cast<Function>(*GI)) { - // If we're in relinking mode, set linkage of all internal callees to - // external. This will allow us extract function, and then - link - // everything together - if (reLink) { - for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end(); - B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), E = B->end(); - I != E; ++I) { - if (CallInst* callInst = dyn_cast<CallInst>(&*I)) { - Function* Callee = callInst->getCalledFunction(); - if (Callee && Callee->hasLocalLinkage()) - Callee->setLinkage(GlobalValue::ExternalLinkage); - } - } - } - } - - NamedFunc->setLinkage(GlobalValue::ExternalLinkage); - NamedFunc->deleteBody(); - assert(NamedFunc->isDeclaration() && "This didn't make the function external!"); - } else { - if (!(*GI)->isDeclaration()) { - cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer - (*GI)->setLinkage(GlobalValue::ExternalLinkage); - } - } - } - return true; - } - - bool isolateGV(Module &M) { - // Mark all globals internal - // FIXME: what should we do with private linkage? - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + // Visit the global inline asm. + if (!deleteStuff) + M.setModuleInlineAsm(""); + + // For simplicity, just give all GlobalValues ExternalLinkage. A trickier + // implementation could figure out which GlobalValues are actually + // referenced by the Named set, and which GlobalValues in the rest of + // the module are referenced by the NamedSet, and get away with leaving + // more internal and private things internal and private. But for now, + // be conservative and simple. + + // Visit the GlobalVariables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) if (!I->isDeclaration()) { - I->setLinkage(GlobalValue::InternalLinkage); + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == Named.count(I)) + I->setInitializer(0); } + + // Visit the Functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration()) { - I->setLinkage(GlobalValue::InternalLinkage); - } - - // Make sure our result is globally accessible... - // by putting them in the used array - { - std::vector<Constant *> AUGs; - const Type *SBP= - Type::getInt8PtrTy(M.getContext()); - for (std::vector<GlobalValue*>::iterator GI = Named.begin(), - GE = Named.end(); GI != GE; ++GI) { - (*GI)->setLinkage(GlobalValue::ExternalLinkage); - AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP)); - } - ArrayType *AT = ArrayType::get(SBP, AUGs.size()); - Constant *Init = ConstantArray::get(AT, AUGs); - GlobalValue *gv = new GlobalVariable(M, AT, false, - GlobalValue::AppendingLinkage, - Init, "llvm.used"); - gv->setSection("llvm.metadata"); - } - - // All of the functions may be used by global variables or the named - // globals. Loop through them and create a new, external functions that - // can be "used", instead of ones with bodies. - std::vector<Function*> NewFunctions; - - Function *Last = --M.end(); // Figure out where the last real fn is. - - for (Module::iterator I = M.begin(); ; ++I) { - if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { - Function *New = Function::Create(I->getFunctionType(), - GlobalValue::ExternalLinkage); - New->copyAttributesFrom(I); - - // If it's not the named function, delete the body of the function - I->dropAllReferences(); - - M.getFunctionList().push_back(New); - NewFunctions.push_back(New); - New->takeName(I); + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == Named.count(I)) + I->deleteBody(); } - if (&*I == Last) break; // Stop after processing the last function - } - - // Now that we have replacements all set up, loop through the module, - // deleting the old functions, replacing them with the newly created - // functions. - if (!NewFunctions.empty()) { - unsigned FuncNum = 0; - Module::iterator I = M.begin(); - do { - if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { - // Make everything that uses the old function use the new dummy fn - I->replaceAllUsesWith(NewFunctions[FuncNum++]); - - Function *Old = I; - ++I; // Move the iterator to the new function - - // Delete the old function! - M.getFunctionList().erase(Old); - - } else { - ++I; // Skip the function we are extracting - } - } while (&*I != NewFunctions[0]); - } - return true; } }; @@ -170,6 +77,6 @@ namespace { } ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, - bool deleteFn, bool relinkCallees) { - return new GVExtractorPass(GVs, deleteFn, relinkCallees); + bool deleteFn) { + return new GVExtractorPass(GVs, deleteFn); } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 9bd7af6..6165ba0 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -41,7 +41,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(&ID) {} + FunctionAttrs() : CallGraphSCCPass(ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -69,8 +69,8 @@ namespace { } char FunctionAttrs::ID = 0; -static RegisterPass<FunctionAttrs> -X("functionattrs", "Deduce function attributes"); +INITIALIZE_PASS(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false); Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } @@ -162,14 +162,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && CS.getCalledFunction()) { + CallSite CS(cast<Value>(I)); + if (CS && CS.getCalledFunction()) { // Ignore calls to functions in the same SCC. if (SCCNodes.count(CS.getCalledFunction())) continue; // Ignore intrinsics that only access local memory. if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) - if (AliasAnalysis::getModRefBehavior(id) == + if (AliasAnalysis::getIntrinsicModRefBehavior(id) == AliasAnalysis::AccessesArguments) { // Check that all pointer arguments point to local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 44216a6..aa18601 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -31,7 +31,7 @@ STATISTIC(NumVariables, "Number of global variables removed"); namespace { struct GlobalDCE : public ModulePass { static char ID; // Pass identification, replacement for typeid - GlobalDCE() : ModulePass(&ID) {} + GlobalDCE() : ModulePass(ID) {} // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. @@ -51,7 +51,8 @@ namespace { } char GlobalDCE::ID = 0; -static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination"); +INITIALIZE_PASS(GlobalDCE, "globaldce", + "Dead Global Elimination", false, false); ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 735a1c4..a77af54 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -59,7 +59,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(&ID) {} + GlobalOpt() : ModulePass(ID) {} bool runOnModule(Module &M); @@ -74,7 +74,8 @@ namespace { } char GlobalOpt::ID = 0; -static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); +INITIALIZE_PASS(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false); ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -1467,7 +1468,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, TargetData *TD) { if (!TD) return false; - + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -2077,7 +2078,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; // The first index must be zero. - ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); + ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin())); if (!CI || !CI->isZero()) return false; // The remaining indices must be compile-time known integers within the @@ -2302,7 +2303,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (isa<InlineAsm>(CI->getCalledValue())) return false; // Resolve function pointers. - Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); + Function *Callee = dyn_cast<Function>(getVal(Values, + CI->getCalledValue())); if (!Callee) return false; // Cannot resolve. SmallVector<Constant*, 8> Formals; diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index e4db235..1b3cf78 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -35,7 +35,7 @@ namespace { /// struct IPCP : public ModulePass { static char ID; // Pass identification, replacement for typeid - IPCP() : ModulePass(&ID) {} + IPCP() : ModulePass(ID) {} bool runOnModule(Module &M); private: @@ -45,8 +45,8 @@ namespace { } char IPCP::ID = 0; -static RegisterPass<IPCP> -X("ipconstprop", "Interprocedural constant propagation"); +INITIALIZE_PASS(IPCP, "ipconstprop", + "Interprocedural constant propagation", false, false); ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } @@ -94,7 +94,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) return false; - CallSite CS = CallSite::get(cast<Instruction>(U)); + CallSite CS(cast<Instruction>(U)); if (!CS.isCallee(UI)) return false; @@ -219,7 +219,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // constant. bool MadeChange = false; for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { - CallSite CS = CallSite::get(*UI); + CallSite CS(*UI); Instruction* Call = CS.getInstruction(); // Not a call instruction or a call instruction that's not calling F diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp index 8e312e7..ecc60ad 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -36,7 +36,7 @@ namespace { InlineCostAnalyzer CA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(&ID, -2000000000) {} + AlwaysInliner() : Inliner(ID, -2000000000) {} static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -61,8 +61,8 @@ namespace { } char AlwaysInliner::ID = 0; -static RegisterPass<AlwaysInliner> -X("always-inline", "Inliner for always_inline functions"); +INITIALIZE_PASS(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false); Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp index 74b4a1c..9c6637d 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -33,8 +33,8 @@ namespace { SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - SimpleInliner() : Inliner(&ID) {} - SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {} + SimpleInliner() : Inliner(ID) {} + SimpleInliner(int Threshold) : Inliner(ID, Threshold) {} static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -56,8 +56,8 @@ namespace { } char SimpleInliner::ID = 0; -static RegisterPass<SimpleInliner> -X("inline", "Function Integration/Inlining"); +INITIALIZE_PASS(SimpleInliner, "inline", + "Function Integration/Inlining", false, false); Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index 9bb01f5..4983e8e 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -48,10 +48,10 @@ HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; -Inliner::Inliner(void *ID) +Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} -Inliner::Inliner(void *ID, int Threshold) +Inliner::Inliner(char &ID, int Threshold) : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} /// getAnalysisUsage - For this class, we declare that we require and preserve @@ -238,11 +238,11 @@ bool Inliner::shouldInline(CallSite CS) { bool someOuterCallWouldNotBeInlined = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); I != E; ++I) { - CallSite CS2 = CallSite::get(*I); + CallSite CS2(*I); // If this isn't a call to Caller (it could be some other sort // of reference) skip it. - if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller) + if (!CS2 || CS2.getCalledFunction() != Caller) continue; InlineCost IC2 = getInlineCost(CS2); @@ -334,10 +334,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - CallSite CS = CallSite::get(I); + CallSite CS(cast<Value>(I)); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. - if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) + if (!CS || isa<IntrinsicInst>(I)) continue; // If this is a direct call to an external function, we can never inline diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp index 47abb7d..a1d919f 100644 --- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -63,11 +63,11 @@ namespace { } // end anonymous namespace char InternalizePass::ID = 0; -static RegisterPass<InternalizePass> -X("internalize", "Internalize Global Symbols"); +INITIALIZE_PASS(InternalizePass, "internalize", + "Internalize Global Symbols", false, false); InternalizePass::InternalizePass(bool AllButMain) - : ModulePass(&ID), AllButMain(AllButMain){ + : ModulePass(ID), AllButMain(AllButMain){ if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); if (!APIList.empty()) // If a list is specified, use it as well. @@ -75,7 +75,7 @@ InternalizePass::InternalizePass(bool AllButMain) } InternalizePass::InternalizePass(const std::vector<const char *>&exportList) - : ModulePass(&ID), AllButMain(false){ + : ModulePass(ID), AllButMain(false){ for(std::vector<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { ExternalNames.insert(*itr); diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index cb81330..f88dff6 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -37,7 +37,7 @@ namespace { unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : LoopPass(&ID), NumLoops(numLoops) {} + : LoopPass(ID), NumLoops(numLoops) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -50,8 +50,8 @@ namespace { } char LoopExtractor::ID = 0; -static RegisterPass<LoopExtractor> -X("loop-extract", "Extract loops into new functions"); +INITIALIZE_PASS(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false); namespace { /// SingleLoopExtractor - For bugpoint. @@ -62,8 +62,8 @@ namespace { } // End anonymous namespace char SingleLoopExtractor::ID = 0; -static RegisterPass<SingleLoopExtractor> -Y("loop-extract-single", "Extract at most one loop into a new function"); +INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", + "Extract at most one loop into a new function", false, false); // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. @@ -147,27 +147,26 @@ namespace { std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; public: static char ID; // Pass identification, replacement for typeid - explicit BlockExtractorPass(const std::vector<BasicBlock*> &B) - : ModulePass(&ID), BlocksToNotExtract(B) { + BlockExtractorPass() : ModulePass(ID) { if (!BlockFile.empty()) LoadFile(BlockFile.c_str()); } - BlockExtractorPass() : ModulePass(&ID) {} bool runOnModule(Module &M); }; } char BlockExtractorPass::ID = 0; -static RegisterPass<BlockExtractorPass> -XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)"); +INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", + "Extract Basic Blocks From Module (for bugpoint use)", + false, false); // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. // -ModulePass *llvm::createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE) +ModulePass *llvm::createBlockExtractorPass() { - return new BlockExtractorPass(BTNE); + return new BlockExtractorPass(); } void BlockExtractorPass::LoadFile(const char *Filename) { diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp index 76cfef8..6c715de 100644 --- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp @@ -109,7 +109,7 @@ namespace { bool IsTransformableFunction(StringRef Name); public: static char ID; // Pass identification, replacement for typeid - LowerSetJmp() : ModulePass(&ID) {} + LowerSetJmp() : ModulePass(ID) {} void visitCallInst(CallInst& CI); void visitInvokeInst(InvokeInst& II); @@ -122,7 +122,7 @@ namespace { } // end anonymous namespace char LowerSetJmp::ID = 0; -static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump"); +INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false); // run - Run the transformation on the program. We grab the function // prototypes for longjmp and setjmp. If they are used in the program, diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index aeeafe7..5d838f9 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -29,44 +29,27 @@ // // Many functions have their address taken by the virtual function table for // the object they belong to. However, as long as it's only used for a lookup -// and call, this is irrelevant, and we'd like to fold such implementations. +// and call, this is irrelevant, and we'd like to fold such functions. // -// * use SCC to cut down on pair-wise comparisons and solve larger cycles. +// * switch from n^2 pair-wise comparisons to an n-way comparison for each +// bucket. // -// The current implementation loops over a pair-wise comparison of all -// functions in the program where the two functions in the pair are treated as -// assumed to be equal until proven otherwise. We could both use fewer -// comparisons and optimize more complex cases if we used strongly connected -// components of the call graph. -// -// * be smarter about bitcast. +// * be smarter about bitcasts. // // In order to fold functions, we will sometimes add either bitcast instructions // or bitcast constant expressions. Unfortunately, this can confound further // analysis since the two functions differ where one has a bitcast and the -// other doesn't. We should learn to peer through bitcasts without imposing bad -// performance properties. -// -// * don't emit aliases for Mach-O. -// -// Mach-O doesn't support aliases which means that we must avoid introducing -// them in the bitcode on architectures which don't support them, such as -// Mac OSX. There's a few approaches to this problem; -// a) teach codegen to lower global aliases to thunks on platforms which don't -// support them. -// b) always emit thunks, and create a separate thunk-to-alias pass which -// runs on ELF systems. This has the added benefit of transforming other -// thunks such as those produced by a C++ frontend into aliases when legal -// to do so. +// other doesn't. We should learn to look through bitcasts. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -76,68 +59,103 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include <map> #include <vector> using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { + /// MergeFunctions finds functions which will generate identical machine code, + /// by considering all pointer types to be equivalent. Once identified, + /// MergeFunctions will fold them by replacing a call to one to a call to a + /// bitcast of the other. + /// class MergeFunctions : public ModulePass { public: - static char ID; // Pass identification, replacement for typeid - MergeFunctions() : ModulePass(&ID) {} + static char ID; + MergeFunctions() : ModulePass(ID) {} bool runOnModule(Module &M); private: - bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2); - - bool equals(const BasicBlock *BB1, const BasicBlock *BB2); - bool equals(const Function *F, const Function *G); + /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G + /// may be deleted, or may be converted into a thunk. In either case, it + /// should never be visited again. + void MergeTwoFunctions(Function *F, Function *G) const; - bool compare(const Value *V1, const Value *V2); + /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also + /// replace direct uses of G with bitcast(F). + void WriteThunk(Function *F, Function *G) const; - const Function *LHS, *RHS; - typedef DenseMap<const Value *, unsigned long> IDMap; - IDMap Map; - DenseMap<const Function *, IDMap> Domains; - DenseMap<const Function *, unsigned long> DomainCount; TargetData *TD; }; } char MergeFunctions::ID = 0; -static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions"); +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false); ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } -// ===----------------------------------------------------------------------=== -// Comparison of functions -// ===----------------------------------------------------------------------=== +namespace { +/// FunctionComparator - Compares two functions to determine whether or not +/// they will generate machine code with the same behaviour. TargetData is +/// used if available. The comparator always fails conservatively (erring on the +/// side of claiming that two functions are different). +class FunctionComparator { +public: + FunctionComparator(const TargetData *TD, const Function *F1, + const Function *F2) + : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {} + + /// Compare - test whether the two functions have equivalent behaviour. + bool Compare(); + +private: + /// Compare - test whether two basic blocks have equivalent behaviour. + bool Compare(const BasicBlock *BB1, const BasicBlock *BB2); + + /// Enumerate - Assign or look up previously assigned numbers for the two + /// values, and return whether the numbers are equal. Numbers are assigned in + /// the order visited. + bool Enumerate(const Value *V1, const Value *V2); + + /// isEquivalentOperation - Compare two Instructions for equivalence, similar + /// to Instruction::isSameOperationAs but with modifications to the type + /// comparison. + bool isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const; + + /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic. + bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); + } -static unsigned long hash(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); + /// isEquivalentType - Compare two Types, treating all pointer types as equal. + bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); - return ID.ComputeHash(); + // The two functions undergoing comparison. + const Function *F1, *F2; + + const TargetData *TD; + + typedef DenseMap<const Value *, unsigned long> IDMap; + IDMap Map1, Map2; + unsigned long IDMap1Count, IDMap2Count; +}; } -/// isEquivalentType - any two pointers are equivalent. Otherwise, standard -/// type equivalence rules apply. -static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { +/// isEquivalentType - any two pointers in the same address space are +/// equivalent. Otherwise, standard type equivalence rules apply. +bool FunctionComparator::isEquivalentType(const Type *Ty1, + const Type *Ty2) const { if (Ty1 == Ty2) return true; if (Ty1->getTypeID() != Ty2->getTypeID()) @@ -184,21 +202,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return true; } - case Type::UnionTyID: { - const UnionType *UTy1 = cast<UnionType>(Ty1); - const UnionType *UTy2 = cast<UnionType>(Ty2); - - // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc. - if (UTy1->getNumElements() != UTy2->getNumElements()) - return false; - - for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { - if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) - return false; - } - return true; - } - case Type::FunctionTyID: { const FunctionType *FTy1 = cast<FunctionType>(Ty1); const FunctionType *FTy2 = cast<FunctionType>(Ty2); @@ -216,11 +219,18 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return true; } - case Type::ArrayTyID: + case Type::ArrayTyID: { + const ArrayType *ATy1 = cast<ArrayType>(Ty1); + const ArrayType *ATy2 = cast<ArrayType>(Ty2); + return ATy1->getNumElements() == ATy2->getNumElements() && + isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); + } + case Type::VectorTyID: { - const SequentialType *STy1 = cast<SequentialType>(Ty1); - const SequentialType *STy2 = cast<SequentialType>(Ty2); - return isEquivalentType(STy1->getElementType(), STy2->getElementType()); + const VectorType *VTy1 = cast<VectorType>(Ty1); + const VectorType *VTy2 = cast<VectorType>(Ty2); + return VTy1->getNumElements() == VTy2->getNumElements() && + isEquivalentType(VTy1->getElementType(), VTy2->getElementType()); } } } @@ -228,8 +238,8 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { /// isEquivalentOperation - determine whether the two operations are the same /// except that pointer-to-A and pointer-to-B are equivalent. This should be /// kept in sync with Instruction::isSameOperationAs. -static bool -isEquivalentOperation(const Instruction *I1, const Instruction *I2) { +bool FunctionComparator::isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const { if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || !isEquivalentType(I1->getType(), I2->getType()) || @@ -281,18 +291,15 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) { return true; } -bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2) { +/// isEquivalentGEP - determine whether two GEP operations perform the same +/// underlying arithmetic. +bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, + const GEPOperator *GEP2) { + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { - SmallVector<Value *, 8> Indices1, Indices2; - for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(), - E = GEP1->idx_end(); I != E; ++I) { - Indices1.push_back(*I); - } - for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(), - E = GEP2->idx_end(); I != E; ++I) { - Indices2.push_back(*I); - } + SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end()); + SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end()); uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), Indices1.data(), Indices1.size()); uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), @@ -300,7 +307,6 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, return Offset1 == Offset2; } - // Equivalent types aren't enough. if (GEP1->getPointerOperand()->getType() != GEP2->getPointerOperand()->getType()) return false; @@ -309,19 +315,26 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, return false; for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!compare(GEP1->getOperand(i), GEP2->getOperand(i))) + if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) return false; } return true; } -bool MergeFunctions::compare(const Value *V1, const Value *V2) { - if (V1 == LHS || V1 == RHS) - if (V2 == LHS || V2 == RHS) - return true; +/// Enumerate - Compare two values used by the two functions under pair-wise +/// comparison. If this is the first time the values are seen, they're added to +/// the mapping so that we will detect mismatches on next use. +bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { + // Check for function @f1 referring to itself and function @f2 referring to + // itself, or referring to each other, or both referring to either of them. + // They're all equivalent if the two functions are otherwise equivalent. + if (V1 == F1 && V2 == F2) + return true; + if (V1 == F2 && V2 == F1) + return true; - // TODO: constant expressions in terms of LHS and RHS + // TODO: constant expressions with GEP or references to F1 or F2. if (isa<Constant>(V1)) return V1 == V2; @@ -332,228 +345,138 @@ bool MergeFunctions::compare(const Value *V1, const Value *V2) { IA1->getConstraintString() == IA2->getConstraintString(); } - // We enumerate constants globally and arguments, basic blocks or - // instructions within the function they belong to. - const Function *Domain1 = NULL; - if (const Argument *A = dyn_cast<Argument>(V1)) { - Domain1 = A->getParent(); - } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) { - Domain1 = BB->getParent(); - } else if (const Instruction *I = dyn_cast<Instruction>(V1)) { - Domain1 = I->getParent()->getParent(); - } - - const Function *Domain2 = NULL; - if (const Argument *A = dyn_cast<Argument>(V2)) { - Domain2 = A->getParent(); - } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) { - Domain2 = BB->getParent(); - } else if (const Instruction *I = dyn_cast<Instruction>(V2)) { - Domain2 = I->getParent()->getParent(); - } - - if (Domain1 != Domain2) - if (Domain1 != LHS && Domain1 != RHS) - if (Domain2 != LHS && Domain2 != RHS) - return false; - - IDMap &Map1 = Domains[Domain1]; unsigned long &ID1 = Map1[V1]; if (!ID1) - ID1 = ++DomainCount[Domain1]; + ID1 = ++IDMap1Count; - IDMap &Map2 = Domains[Domain2]; unsigned long &ID2 = Map2[V2]; if (!ID2) - ID2 = ++DomainCount[Domain2]; + ID2 = ++IDMap2Count; return ID1 == ID2; } -bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) { - BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); - BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); +/// Compare - test whether two basic blocks have equivalent behaviour. +bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { + BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); + BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); do { - if (!compare(FI, GI)) + if (!Enumerate(F1I, F2I)) return false; - if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) { - const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI); - const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI); + if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { + const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I); + if (!GEP2) + return false; - if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; if (!isEquivalentGEP(GEP1, GEP2)) return false; } else { - if (!isEquivalentOperation(FI, GI)) + if (!isEquivalentOperation(F1I, F2I)) return false; - for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = FI->getOperand(i); - Value *OpG = GI->getOperand(i); + assert(F1I->getNumOperands() == F2I->getNumOperands()); + for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { + Value *OpF1 = F1I->getOperand(i); + Value *OpF2 = F2I->getOperand(i); - if (!compare(OpF, OpG)) + if (!Enumerate(OpF1, OpF2)) return false; - if (OpF->getValueID() != OpG->getValueID() || - !isEquivalentType(OpF->getType(), OpG->getType())) + if (OpF1->getValueID() != OpF2->getValueID() || + !isEquivalentType(OpF1->getType(), OpF2->getType())) return false; } } - ++FI, ++GI; - } while (FI != FE && GI != GE); + ++F1I, ++F2I; + } while (F1I != F1E && F2I != F2E); - return FI == FE && GI == GE; + return F1I == F1E && F2I == F2E; } -bool MergeFunctions::equals(const Function *F, const Function *G) { +/// Compare - test whether the two functions have equivalent behaviour. +bool FunctionComparator::Compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. - if (F->getAttributes() != G->getAttributes()) + if (F1->getAttributes() != F2->getAttributes()) return false; - if (F->hasGC() != G->hasGC()) + if (F1->hasGC() != F2->hasGC()) return false; - if (F->hasGC() && F->getGC() != G->getGC()) + if (F1->hasGC() && F1->getGC() != F2->getGC()) return false; - if (F->hasSection() != G->hasSection()) + if (F1->hasSection() != F2->hasSection()) return false; - if (F->hasSection() && F->getSection() != G->getSection()) + if (F1->hasSection() && F1->getSection() != F2->getSection()) return false; - if (F->isVarArg() != G->isVarArg()) + if (F1->isVarArg() != F2->isVarArg()) return false; // TODO: if it's internal and only used in direct calls, we could handle this // case too. - if (F->getCallingConv() != G->getCallingConv()) + if (F1->getCallingConv() != F2->getCallingConv()) return false; - if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) + if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) return false; - assert(F->arg_size() == G->arg_size() && + assert(F1->arg_size() == F2->arg_size() && "Identical functions have a different number of args."); - LHS = F; - RHS = G; - // Visit the arguments so that they get enumerated in the order they're // passed in. - for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), - fe = F->arg_end(); fi != fe; ++fi, ++gi) { - if (!compare(fi, gi)) + for (Function::const_arg_iterator f1i = F1->arg_begin(), + f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { + if (!Enumerate(f1i, f2i)) llvm_unreachable("Arguments repeat"); } - SmallVector<const BasicBlock *, 8> FBBs, GBBs; - SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F. - FBBs.push_back(&F->getEntryBlock()); - GBBs.push_back(&G->getEntryBlock()); - VisitedBBs.insert(FBBs[0]); - while (!FBBs.empty()) { - const BasicBlock *FBB = FBBs.pop_back_val(); - const BasicBlock *GBB = GBBs.pop_back_val(); - if (!compare(FBB, GBB) || !equals(FBB, GBB)) { - Domains.clear(); - DomainCount.clear(); - return false; - } - const TerminatorInst *FTI = FBB->getTerminator(); - const TerminatorInst *GTI = GBB->getTerminator(); - assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); - for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(FTI->getSuccessor(i))) - continue; - FBBs.push_back(FTI->getSuccessor(i)); - GBBs.push_back(GTI->getSuccessor(i)); - } - } + // We do a CFG-ordered walk since the actual ordering of the blocks in the + // linked list is immaterial. Our walk starts at the entry block for both + // functions, then takes each block from each terminator in order. As an + // artifact, this also means that unreachable blocks are ignored. + SmallVector<const BasicBlock *, 8> F1BBs, F2BBs; + SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. - Domains.clear(); - DomainCount.clear(); - return true; -} + F1BBs.push_back(&F1->getEntryBlock()); + F2BBs.push_back(&F2->getEntryBlock()); -// ===----------------------------------------------------------------------=== -// Folding of functions -// ===----------------------------------------------------------------------=== - -// Cases: -// * F is external strong, G is external strong: -// turn G into a thunk to F (1) -// * F is external strong, G is external weak: -// turn G into a thunk to F (1) -// * F is external weak, G is external weak: -// unfoldable -// * F is external strong, G is internal: -// address of G taken: -// turn G into a thunk to F (1) -// address of G not taken: -// make G an alias to F (2) -// * F is internal, G is external weak -// address of F is taken: -// turn G into a thunk to F (1) -// address of F is not taken: -// make G an alias of F (2) -// * F is internal, G is internal: -// address of F and G are taken: -// turn G into a thunk to F (1) -// address of G is not taken: -// make G an alias to F (2) -// -// alias requires linkage == (external,local,weak) fallback to creating a thunk -// external means 'externally visible' linkage != (internal,private) -// internal means linkage == (internal,private) -// weak means linkage mayBeOverridable -// being external implies that the address is taken -// -// 1. turn G into a thunk to F -// 2. make G an alias to F + VisitedBBs.insert(F1BBs[0]); + while (!F1BBs.empty()) { + const BasicBlock *F1BB = F1BBs.pop_back_val(); + const BasicBlock *F2BB = F2BBs.pop_back_val(); -enum LinkageCategory { - ExternalStrong, - ExternalWeak, - Internal -}; + if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB)) + return false; -static LinkageCategory categorize(const Function *F) { - switch (F->getLinkage()) { - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - case GlobalValue::LinkerPrivateLinkage: - return Internal; - - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: - case GlobalValue::ExternalWeakLinkage: - case GlobalValue::LinkerPrivateWeakLinkage: - return ExternalWeak; - - case GlobalValue::ExternalLinkage: - case GlobalValue::AvailableExternallyLinkage: - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::AppendingLinkage: - case GlobalValue::DLLImportLinkage: - case GlobalValue::DLLExportLinkage: - case GlobalValue::CommonLinkage: - return ExternalStrong; - } + const TerminatorInst *F1TI = F1BB->getTerminator(); + const TerminatorInst *F2TI = F2BB->getTerminator(); - llvm_unreachable("Unknown LinkageType."); - return ExternalWeak; + assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); + for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(F1TI->getSuccessor(i))) + continue; + + F1BBs.push_back(F1TI->getSuccessor(i)); + F2BBs.push_back(F2TI->getSuccessor(i)); + } + } + return true; } -static void ThunkGToF(Function *F, Function *G) { +/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace +/// direct uses of G with bitcast(F). +void MergeFunctions::WriteThunk(Function *F, Function *G) const { if (!G->mayBeOverridden()) { // Redirect direct callers of G to F. Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); @@ -567,33 +490,34 @@ static void ThunkGToF(Function *F, Function *G) { } } + // If G was internal then we may have replaced all uses if G with F. If so, + // stop here and delete G. There's no need for a thunk. + if (G->hasLocalLinkage() && G->use_empty()) { + G->eraseFromParent(); + return; + } + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); + IRBuilder<false> Builder(BB); SmallVector<Value *, 16> Args; unsigned i = 0; const FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { - if (FFTy->getParamType(i) == AI->getType()) { - Args.push_back(AI); - } else { - Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB)); - } + Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i))); ++i; } - CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); + CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end()); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType()->isVoidTy()) { - ReturnInst::Create(F->getContext(), BB); - } else if (CI->getType() != NewG->getReturnType()) { - Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); - ReturnInst::Create(F->getContext(), BCI, BB); + Builder.CreateRetVoid(); } else { - ReturnInst::Create(F->getContext(), CI, BB); + Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType())); } NewG->copyAttributesFrom(G); @@ -602,152 +526,126 @@ static void ThunkGToF(Function *F, Function *G) { G->eraseFromParent(); } -static void AliasGToF(Function *F, Function *G) { - // Darwin will trigger llvm_unreachable if asked to codegen an alias. - return ThunkGToF(F, G); - -#if 0 - if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) - return ThunkGToF(F, G); - - GlobalAlias *GA = new GlobalAlias( - G->getType(), G->getLinkage(), "", - ConstantExpr::getBitCast(F, G->getType()), G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - GA->takeName(G); - GA->setVisibility(G->getVisibility()); - G->replaceAllUsesWith(GA); - G->eraseFromParent(); -#endif -} - -static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { - Function *F = FnVec[i]; - Function *G = FnVec[j]; - - LinkageCategory catF = categorize(F); - LinkageCategory catG = categorize(G); - - if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(catF, catG); - } - - switch (catF) { - case ExternalStrong: - switch (catG) { - case ExternalStrong: - case ExternalWeak: - ThunkGToF(F, G); - break; - case Internal: - if (G->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - } - break; - - case ExternalWeak: { - assert(catG == ExternalWeak); +/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, +/// Function G is deleted. +void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const { + if (F->isWeakForLinker()) { + assert(G->isWeakForLinker()); // Make them both thunks to the same internal function. - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", F->getParent()); H->copyAttributesFrom(F); H->takeName(F); F->replaceAllUsesWith(H); - ThunkGToF(F, G); - ThunkGToF(F, H); + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - F->setLinkage(GlobalValue::InternalLinkage); - } break; - - case Internal: - switch (catG) { - case ExternalStrong: - llvm_unreachable(0); - // fall-through - case ExternalWeak: - if (F->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - case Internal: { - bool addrTakenF = F->hasAddressTaken(); - bool addrTakenG = G->hasAddressTaken(); - if (!addrTakenF && addrTakenG) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + WriteThunk(F, G); + WriteThunk(F, H); - if (addrTakenF && addrTakenG) { - ThunkGToF(F, G); - } else { - assert(!addrTakenG); - AliasGToF(F, G); - } - } break; - } break; + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::InternalLinkage); + } else { + WriteThunk(F, G); } ++NumFunctionsMerged; - return true; } -// ===----------------------------------------------------------------------=== -// Pass definition -// ===----------------------------------------------------------------------=== +static unsigned ProfileFunction(const Function *F) { + const FunctionType *FTy = F->getFunctionType(); -bool MergeFunctions::runOnModule(Module &M) { - bool Changed = false; + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); +} - std::map<unsigned long, std::vector<Function *> > FnMap; +class ComparableFunction { +public: + ComparableFunction(Function *Func, TargetData *TD) + : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {} - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) - continue; + AssertingVH<Function> const Func; + const unsigned Hash; + TargetData * const TD; +}; - FnMap[hash(F)].push_back(F); +struct MergeFunctionsEqualityInfo { + static ComparableFunction *getEmptyKey() { + return reinterpret_cast<ComparableFunction*>(0); + } + static ComparableFunction *getTombstoneKey() { + return reinterpret_cast<ComparableFunction*>(-1); } + static unsigned getHashValue(const ComparableFunction *CF) { + return CF->Hash; + } + static bool isEqual(const ComparableFunction *LHS, + const ComparableFunction *RHS) { + if (LHS == RHS) + return true; + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return false; + assert(LHS->TD == RHS->TD && "Comparing functions for different targets"); + return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare(); + } +}; +bool MergeFunctions::runOnModule(Module &M) { + typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType; + + bool Changed = false; TD = getAnalysisIfAvailable<TargetData>(); + std::vector<Function *> Funcs; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) + Funcs.push_back(F); + } + bool LocalChanged; do { LocalChanged = false; - DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); - for (std::map<unsigned long, std::vector<Function *> >::iterator - I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { - std::vector<Function *> &FnVec = I->second; - DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); - - for (int i = 0, e = FnVec.size(); i != e; ++i) { - for (int j = i + 1; j != e; ++j) { - bool isEqual = equals(FnVec[i], FnVec[j]); - - DEBUG(dbgs() << " " << FnVec[i]->getName() - << (isEqual ? " == " : " != ") - << FnVec[j]->getName() << "\n"); - - if (isEqual) { - if (fold(FnVec, i, j)) { - LocalChanged = true; - FnVec.erase(FnVec.begin() + j); - --j, --e; - } - } - } - } + FnSetType FnSet; + for (unsigned i = 0, e = Funcs.size(); i != e;) { + Function *F = Funcs[i]; + ComparableFunction *NewF = new ComparableFunction(F, TD); + std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); + if (!Result.second) { + ComparableFunction *&OldF = *Result.first; + assert(OldF && "Expected a hash collision"); + + // NewF will be deleted in favour of OldF unless NewF is strong and + // OldF is weak in which case swap them to keep the strong definition. + + if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker()) + std::swap(OldF, NewF); + + DEBUG(dbgs() << " " << OldF->Func->getName() << " == " + << NewF->Func->getName() << '\n'); + + Funcs.erase(Funcs.begin() + i); + --e; + + Function *DeleteF = NewF->Func; + delete NewF; + MergeTwoFunctions(OldF->Func, DeleteF); + LocalChanged = true; + Changed = true; + } else { + ++i; + } } - Changed |= LocalChanged; + DeleteContainerPointers(FnSet); } while (LocalChanged); return Changed; diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index 6b9814c..432f7c5 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -30,7 +30,7 @@ namespace { struct PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - PartialInliner() : ModulePass(&ID) {} + PartialInliner() : ModulePass(ID) {} bool runOnModule(Module& M); @@ -40,7 +40,8 @@ namespace { } char PartialInliner::ID = 0; -static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner"); +INITIALIZE_PASS(PartialInliner, "partial-inliner", + "Partial Inliner", false, false); ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } @@ -67,7 +68,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Clone the function, so that we can hack away on it. ValueMap<const Value*, Value*> VMap; - Function* duplicateFunction = CloneFunction(F, VMap); + Function* duplicateFunction = CloneFunction(F, VMap, + /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); @@ -159,7 +161,7 @@ bool PartialInliner::runOnModule(Module& M) { bool recursive = false; for (Function::use_iterator UI = currFunc->use_begin(), UE = currFunc->use_end(); UI != UE; ++UI) - if (Instruction* I = dyn_cast<Instruction>(UI)) + if (Instruction* I = dyn_cast<Instruction>(*UI)) if (I->getParent()->getParent() == currFunc) { recursive = true; break; diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp index 58e1448..4a99a41 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp @@ -50,14 +50,14 @@ namespace { int scanDistribution(Function&, int, std::map<Constant*, int>&); public : static char ID; // Pass identification, replacement for typeid - PartSpec() : ModulePass(&ID) {} + PartSpec() : ModulePass(ID) {} bool runOnModule(Module &M); }; } char PartSpec::ID = 0; -static RegisterPass<PartSpec> -X("partialspecialization", "Partial Specialization"); +INITIALIZE_PASS(PartSpec, "partialspecialization", + "Partial Specialization", false, false); // Specialize F by replacing the arguments (keys) in replacements with the // constants (values). Replace all calls to F with those constants with @@ -74,7 +74,8 @@ SpecializeFunction(Function* F, deleted[arg->getArgNo()] = arg; } - Function* NF = CloneFunction(F, replacements); + Function* NF = CloneFunction(F, replacements, + /*ModuleLevelChanges=*/false); NF->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(NF); @@ -82,10 +83,10 @@ SpecializeFunction(Function* F, ii != ee; ) { Value::use_iterator i = ii; ++ii; - if (isa<CallInst>(i) || isa<InvokeInst>(i)) { - CallSite CS(cast<Instruction>(i)); + User *U = *i; + CallSite CS(U); + if (CS) { if (CS.getCalledFunction() == F) { - SmallVector<Value*, 6> args; // Assemble the non-specialized arguments for the updated callsite. // In the process, make sure that the specialized arguments are @@ -105,13 +106,13 @@ SpecializeFunction(Function* F, } } Value* NCall; - if (CallInst *CI = dyn_cast<CallInst>(i)) { + if (CallInst *CI = dyn_cast<CallInst>(U)) { NCall = CallInst::Create(NF, args.begin(), args.end(), CI->getName(), CI); cast<CallInst>(NCall)->setTailCall(CI->isTailCall()); cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv()); } else { - InvokeInst *II = cast<InvokeInst>(i); + InvokeInst *II = cast<InvokeInst>(U); NCall = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), args.begin(), args.end(), @@ -123,8 +124,7 @@ SpecializeFunction(Function* F, ++numReplaced; } } - next_use: - ; + next_use:; } return NF; } @@ -174,14 +174,14 @@ void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { ui != ue; ++ui) { bool interesting = false; - - if (isa<CmpInst>(ui)) interesting = true; - else if (isa<CallInst>(ui)) + User *U = *ui; + if (isa<CmpInst>(U)) interesting = true; + else if (isa<CallInst>(U)) interesting = ui->getOperand(0) == ii; - else if (isa<InvokeInst>(ui)) + else if (isa<InvokeInst>(U)) interesting = ui->getOperand(0) == ii; - else if (isa<SwitchInst>(ui)) interesting = true; - else if (isa<BranchInst>(ui)) interesting = true; + else if (isa<SwitchInst>(U)) interesting = true; + else if (isa<BranchInst>(U)) interesting = true; if (interesting) { args.push_back(std::distance(F.arg_begin(), ii)); @@ -196,14 +196,16 @@ int PartSpec::scanDistribution(Function& F, int arg, std::map<Constant*, int>& dist) { bool hasIndirect = false; int total = 0; - for(Value::use_iterator ii = F.use_begin(), ee = F.use_end(); - ii != ee; ++ii) - if ((isa<CallInst>(ii) || isa<InvokeInst>(ii)) - && ii->getOperand(0) == &F) { - ++dist[dyn_cast<Constant>(ii->getOperand(arg + 1))]; + for (Value::use_iterator ii = F.use_begin(), ee = F.use_end(); + ii != ee; ++ii) { + User *U = *ii; + CallSite CS(U); + if (CS && CS.getCalledFunction() == &F) { + ++dist[dyn_cast<Constant>(CS.getArgument(arg))]; ++total; } else hasIndirect = true; + } // Preserve the original address taken function even if all other uses // will be specialized. diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index de6099c..09ac76f 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -37,7 +37,7 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized"); namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(&ID) {} + PruneEH() : CallGraphSCCPass(ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -48,8 +48,8 @@ namespace { } char PruneEH::ID = 0; -static RegisterPass<PruneEH> -X("prune-eh", "Remove unused exception handling info"); +INITIALIZE_PASS(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false); Pass *llvm::createPruneEHPass() { return new PruneEH(); } diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp index 4566a76..ee10ad0 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -29,15 +29,15 @@ namespace { class StripDeadPrototypesPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(&ID) { } + StripDeadPrototypesPass() : ModulePass(ID) { } virtual bool runOnModule(Module &M); }; } // end anonymous namespace char StripDeadPrototypesPass::ID = 0; -static RegisterPass<StripDeadPrototypesPass> -X("strip-dead-prototypes", "Strip Unused Function Prototypes"); +INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", + "Strip Unused Function Prototypes", false, false); bool StripDeadPrototypesPass::runOnModule(Module &M) { bool MadeChange = false; diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index 12e8db8..20b7b8f 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -39,7 +39,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripSymbols(bool ODI = false) - : ModulePass(&ID), OnlyDebugInfo(ODI) {} + : ModulePass(ID), OnlyDebugInfo(ODI) {} virtual bool runOnModule(Module &M); @@ -52,7 +52,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() - : ModulePass(&ID) {} + : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -65,7 +65,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() - : ModulePass(&ID) {} + : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -78,7 +78,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDeadDebugInfo() - : ModulePass(&ID) {} + : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -89,32 +89,33 @@ namespace { } char StripSymbols::ID = 0; -static RegisterPass<StripSymbols> -X("strip", "Strip all symbols from a module"); +INITIALIZE_PASS(StripSymbols, "strip", + "Strip all symbols from a module", false, false); ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { return new StripSymbols(OnlyDebugInfo); } char StripNonDebugSymbols::ID = 0; -static RegisterPass<StripNonDebugSymbols> -Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module"); +INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug", + "Strip all symbols, except dbg symbols, from a module", + false, false); ModulePass *llvm::createStripNonDebugSymbolsPass() { return new StripNonDebugSymbols(); } char StripDebugDeclare::ID = 0; -static RegisterPass<StripDebugDeclare> -Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics"); +INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare", + "Strip all llvm.dbg.declare intrinsics", false, false); ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); } char StripDeadDebugInfo::ID = 0; -static RegisterPass<StripDeadDebugInfo> -A("strip-dead-debug-info", "Strip debug info for unused symbols"); +INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info", + "Strip debug info for unused symbols", false, false); ModulePass *llvm::createStripDeadDebugInfoPass() { return new StripDeadDebugInfo(); @@ -254,14 +255,15 @@ static bool StripDebugInfo(Module &M) { } } - unsigned MDDbgKind = M.getMDKindID("dbg"); for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { - Changed = true; // FIXME: Only set if there was debug metadata. - BI->setMetadata(MDDbgKind, 0); + if (!BI->getDebugLoc().isUnknown()) { + Changed = true; + BI->setDebugLoc(DebugLoc()); + } } return Changed; @@ -348,8 +350,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), E = MDs.end(); I != E; ++I) { - if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), - true)) { + GlobalVariable *GV = DIGlobalVariable(*I).getGlobal(); + if (GV && M.getGlobalVariable(GV->getName(), true)) { if (!NMD) NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(*I); diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp index a74686f..b82b03f 100644 --- a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp @@ -1,4 +1,4 @@ -//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===// +//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===// // // The LLVM Compiler Infrastructure // @@ -50,20 +50,19 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(&ID) {} + SRETPromotion() : CallGraphSCCPass(ID) {} private: CallGraphNode *PromoteReturn(CallGraphNode *CGN); bool isSafeToUpdateAllCallers(Function *F); Function *cloneFunctionBody(Function *F, const StructType *STy); CallGraphNode *updateCallSites(Function *F, Function *NF); - bool nestedStructType(const StructType *STy); }; } char SRETPromotion::ID = 0; -static RegisterPass<SRETPromotion> -X("sretpromotion", "Promote sret arguments to multiple ret values"); +INITIALIZE_PASS(SRETPromotion, "sretpromotion", + "Promote sret arguments to multiple ret values", false, false); Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); @@ -156,7 +155,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { FnUseI != FnUseE; ++FnUseI) { // The function is passed in as an argument to (possibly) another function, // we can't change it! - CallSite CS = CallSite::get(*FnUseI); + CallSite CS(*FnUseI); Instruction *Call = CS.getInstruction(); // The function is used by something else than a call or invoke instruction, // we can't change it! @@ -187,7 +186,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { return false; for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); GEPI != GEPE; ++GEPI) - if (!isa<LoadInst>(GEPI)) + if (!isa<LoadInst>(*GEPI)) return false; } // Any other FirstArg users make this function unsuitable for sret @@ -271,7 +270,7 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); while (!F->use_empty()) { - CallSite CS = CallSite::get(*F->use_begin()); + CallSite CS(*F->use_begin()); Instruction *Call = CS.getInstruction(); const AttrListPtr &PAL = F->getAttributes(); @@ -351,14 +350,3 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { return NF_CGN; } -/// nestedStructType - Return true if STy includes any -/// other aggregate types -bool SRETPromotion::nestedStructType(const StructType *STy) { - unsigned Num = STy->getNumElements(); - for (unsigned i = 0; i < Num; i++) { - const Type *Ty = STy->getElementType(i); - if (!Ty->isSingleValueType() && !Ty->isVoidTy()) - return true; - } - return false; -} diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index 24e0528..6f9609c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -81,7 +81,7 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {} public: virtual bool runOnFunction(Function &F); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 5876f40..19a05bf 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -474,19 +474,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> - // (icmp eq (A & (C1|C2)), (C1|C2)) + // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Instruction *I1 = dyn_cast<Instruction>(Val); - Instruction *I2 = dyn_cast<Instruction>(Val2); - if (I1 && I1->getOpcode() == Instruction::And && - I2 && I2->getOpcode() == Instruction::And && - I1->getOperand(0) == I1->getOperand(0)) { - ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1)); - ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1)); - if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() && - CI1->getValue().operator&(CI2->getValue()) == 0) { + Value *Op1 = 0, *Op2 = 0; + ConstantInt *CI1 = 0, *CI2 = 0; + if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && + match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { + if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && + CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr); + Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); } } @@ -1170,11 +1167,28 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); if (LHSCst == 0 || RHSCst == 0) return 0; - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) - if (LHSCst == RHSCst && LHSCC == RHSCC && - LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) --> + // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *Op1 = 0, *Op2 = 0; + ConstantInt *CI1 = 0, *CI2 = 0; + if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && + match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { + if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && + CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { + Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); + Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); + return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr); + } + } + } } // From here on, we only handle: diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 85251a8..0ebe3b4 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -96,14 +96,23 @@ static unsigned EnforceKnownAlignment(Value *V, /// increase the alignment of the ultimate object, making this check succeed. unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, unsigned PrefAlign) { - unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : - sizeof(PrefAlign) * CHAR_BIT; + assert(V->getType()->isPointerTy() && + "GetOrEnforceKnownAlignment expects a pointer!"); + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(V, Mask, KnownZero, KnownOne); unsigned TrailZ = KnownZero.countTrailingOnes(); + + // Avoid trouble with rediculously large TrailZ values, such as + // those computed from a null pointer. + TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + // LLVM doesn't support alignments larger than this currently. + Align = std::min(Align, +Value::MaximumAlignment); + if (PrefAlign > Align) Align = EnforceKnownAlignment(V, Align, PrefAlign); @@ -529,7 +538,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X + 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { - UndefValue::get(II->getCalledValue()->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); @@ -630,8 +639,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); APInt DemandedElts(VWidth, 1); APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, - UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), + DemandedElts, UndefElts)) { II->setArgOperand(0, V); return II; } @@ -655,8 +664,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); + Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), + Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), + Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -772,13 +783,15 @@ protected: NewInstruction = IC->ReplaceInstUsesWith(*CI, With); } bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { if (SizeCI->isAllOnesValue()) return true; if (isString) return SizeCI->getZExtValue() >= - GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)); - if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset))) + GetStringLength(CI->getArgOperand(SizeArgOp)); + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); } return false; @@ -1140,7 +1153,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { IntrinsicInst *Tramp = cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); - Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); + Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 505a0bf..79a9b09 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -396,6 +396,11 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) { case Instruction::Trunc: // trunc(trunc(x)) -> trunc(x) return true; + case Instruction::ZExt: + case Instruction::SExt: + // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest + // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest + return true; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); return CanEvaluateTruncated(SI->getTrueValue(), Ty) && @@ -454,6 +459,29 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } + + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. + Value *A = 0; ConstantInt *Cst = 0; + if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) && + Src->hasOneUse()) { + // We have three types to worry about here, the type of A, the source of + // the truncate (MidSize), and the destination of the truncate. We know that + // ASize < MidSize and MidSize > ResultSize, but don't know the relation + // between ASize and ResultSize. + unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + + // If the shift amount is larger than the size of A, then the result is + // known to be zero because all the input bits got shifted out. + if (Cst->getZExtValue() >= ASize) + return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + + // Since we're doing an lshr and a zero extend, and know that the shift + // amount is smaller than ASize, it is always safe to do the shift in A's + // type, then zero extend or truncate to the result. + Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + } return 0; } @@ -538,8 +566,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); } } } @@ -1097,6 +1124,38 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { break; } } + + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) + // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it. + CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); + if (Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "sqrt" && + Call->getNumArgOperands() == 1) { + CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); + if (Arg && Arg->getOpcode() == Instruction::FPExt && + CI.getType()->isFloatTy() && + Call->getType()->isDoubleTy() && + Arg->getType()->isDoubleTy() && + Arg->getOperand(0)->getType()->isFloatTy()) { + Function *Callee = Call->getCalledFunction(); + Module *M = CI.getParent()->getParent()->getParent(); + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Callee->getAttributes(), + Builder->getFloatTy(), + Builder->getFloatTy(), + NULL); + CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), + "sqrtfcall"); + ret->setAttributes(Callee->getAttributes()); + + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. + Call->replaceAllUsesWith(UndefValue::get(Call->getType())); + EraseInstFromFunction(*Call); + return ret; + } + } + return 0; } @@ -1308,6 +1367,199 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, return new ShuffleVectorInst(InVal, V2, Mask); } +static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) { + return Value % Ty->getPrimitiveSizeInBits() == 0; +} + +static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) { + return Value / Ty->getPrimitiveSizeInBits(); +} + +/// CollectInsertionElements - V is a value which is inserted into a vector of +/// VecEltTy. Look through the value to see if we can decompose it into +/// insertions into the vector. See the example in the comment for +/// OptimizeIntegerToVectorInsertions for the pattern this handles. +/// The type of V is always a non-zero multiple of VecEltTy's size. +/// +/// This returns false if the pattern can't be matched or true if it can, +/// filling in Elements with the elements found here. +static bool CollectInsertionElements(Value *V, unsigned ElementIndex, + SmallVectorImpl<Value*> &Elements, + const Type *VecEltTy) { + // Undef values never contribute useful bits to the result. + if (isa<UndefValue>(V)) return true; + + // If we got down to a value of the right type, we win, try inserting into the + // right element. + if (V->getType() == VecEltTy) { + // Inserting null doesn't actually insert any elements. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return true; + + // Fail if multiple elements are inserted into this slot. + if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) + return false; + + Elements[ElementIndex] = V; + return true; + } + + if (Constant *C = dyn_cast<Constant>(V)) { + // Figure out the # elements this provides, and bitcast it or slice it up + // as required. + unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(), + VecEltTy); + // If the constant is the size of a vector element, we just need to bitcast + // it to the right type so it gets properly inserted. + if (NumElts == 1) + return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + ElementIndex, Elements, VecEltTy); + + // Okay, this is a constant that covers multiple elements. Slice it up into + // pieces and insert each element-sized piece into the vector. + if (!isa<IntegerType>(C->getType())) + C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), + C->getType()->getPrimitiveSizeInBits())); + unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); + const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), + i*ElementSize)); + Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); + if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy)) + return false; + } + return true; + } + + if (!V->hasOneUse()) return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return false; + switch (I->getOpcode()) { + default: return false; // Unhandled case. + case Instruction::BitCast: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::ZExt: + if (!isMultipleOfTypeSize( + I->getOperand(0)->getType()->getPrimitiveSizeInBits(), + VecEltTy)) + return false; + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::Or: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy) && + CollectInsertionElements(I->getOperand(1), ElementIndex, + Elements, VecEltTy); + case Instruction::Shl: { + // Must be shifting by a constant that is a multiple of the element size. + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; + unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); + + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, + Elements, VecEltTy); + } + + } +} + + +/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we +/// may be doing shifts and ors to assemble the elements of the vector manually. +/// Try to rip the code out and replace it with insertelements. This is to +/// optimize code like this: +/// +/// %tmp37 = bitcast float %inc to i32 +/// %tmp38 = zext i32 %tmp37 to i64 +/// %tmp31 = bitcast float %inc5 to i32 +/// %tmp32 = zext i32 %tmp31 to i64 +/// %tmp33 = shl i64 %tmp32, 32 +/// %ins35 = or i64 %tmp33, %tmp38 +/// %tmp43 = bitcast i64 %ins35 to <2 x float> +/// +/// Into two insertelements that do "buildvector{%inc, %inc5}". +static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, + InstCombiner &IC) { + const VectorType *DestVecTy = cast<VectorType>(CI.getType()); + Value *IntInput = CI.getOperand(0); + + SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); + if (!CollectInsertionElements(IntInput, 0, Elements, + DestVecTy->getElementType())) + return 0; + + // If we succeeded, we know that all of the element are specified by Elements + // or are zero if Elements has a null entry. Recast this as a set of + // insertions. + Value *Result = Constant::getNullValue(CI.getType()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) { + if (Elements[i] == 0) continue; // Unset element. + + Result = IC.Builder->CreateInsertElement(Result, Elements[i], + IC.Builder->getInt32(i)); + } + + return Result; +} + + +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast. The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + Value *Src = CI.getOperand(0); + const Type *DestTy = CI.getType(); + + // If this is a bitcast from int to float, check to see if the int is an + // extraction from a vector. + Value *VecInput = 0; + // bitcast(trunc(bitcast(somevector))) + if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && + isa<VectorType>(VecInput->getType())) { + const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + } + } + + // bitcast(trunc(lshr(bitcast(somevector), cst)) + ConstantInt *ShAmt = 0; + if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShAmt)))) && + isa<VectorType>(VecInput->getType())) { + const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && + ShAmt->getZExtValue() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } + return 0; +} Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, @@ -1359,6 +1611,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { ((Instruction*)NULL)); } } + + // Try to optimize int -> float bitcasts. + if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + return I; if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { @@ -1368,16 +1625,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - // If this is a cast from an integer to vector, check to see if the input - // is a trunc or zext of a bitcast from vector. If so, we can replace all - // the casts with a shuffle and (potentially) a bitcast. - if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){ - CastInst *SrcCast = cast<CastInst>(Src); - if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) - if (isa<VectorType>(BCIn->getOperand(0)->getType())) - if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + if (isa<IntegerType>(SrcTy)) { + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) { + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), cast<VectorType>(DestTy), *this)) - return I; + return I; + } + + // If the input is an 'or' instruction, we may be doing shifts and ors to + // assemble the elements of the vector manually. Try to rip the code out + // and replace it with insertelements. + if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + return ReplaceInstUsesWith(CI, V); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6c00586..d7e2b72 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1374,7 +1374,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 8933a0b..b68fbc2 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -146,10 +146,14 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (TD) { unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : + TD->getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); } // load (cast X) --> cast (load X) iff safe. @@ -369,7 +373,7 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U)) return DI; if (isa<BitCastInst>(U) && U->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin())) + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin())) return DI; } } @@ -411,10 +415,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (TD) { unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : + TD->getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); } // Do really simple DSE, to catch cases where there are several consecutive diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index f9ffdb1..c44fe9d 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -699,34 +699,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SI.setOperand(2, TrueVal); return &SI; } - - // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T - // Note: This is a canonicalization rather than an optimization, and is used - // to expose opportunities to other instcombine transforms. - Instruction* CondInst = dyn_cast<Instruction>(CondVal); - if (CondInst && CondInst->hasOneUse() && - CondInst->getOpcode() == Instruction::Or) { - ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0)); - ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1)); - if (LHSCmp && LHSCmp->hasOneUse() && - LHSCmp->getPredicate() == ICmpInst::ICMP_EQ && - RHSCmp && RHSCmp->hasOneUse() && - RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) { - ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1)); - ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1)); - if (C1 && C1->isZero() && C2 && C2->isZero()) { - LHSCmp->setPredicate(ICmpInst::ICMP_NE); - RHSCmp->setPredicate(ICmpInst::ICMP_NE); - Value *And = - InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp, - "and."+CondVal->getName()), SI); - SI.setOperand(0, And); - SI.setOperand(1, FalseVal); - SI.setOperand(2, TrueVal); - return &SI; - } - } - } return 0; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index e5ce8a6..27716b8 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -56,10 +56,270 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return 0; } +/// CanEvaluateShifted - See if we can compute the specified value, but shifted +/// logically to the left or right by some number of bits. This should return +/// true if the expression can be computed for the same cost as the current +/// expression tree. This is used to eliminate extraneous shifting from things +/// like: +/// %C = shl i128 %A, 64 +/// %D = shl i128 %B, 96 +/// %E = or i128 %C, %D +/// %F = lshr i128 %E, 64 +/// where the client will ask if E can be computed shifted right by 64-bits. If +/// this succeeds, the GetShiftedValue function will be called to produce the +/// value. +static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is the opposite shift, we can directly reuse the input of the shift + // if the needed bits are already zero in the input. This allows us to reuse + // the value which means that we don't care if the shift has multiple uses. + // TODO: Handle opposite shift by exact value. + ConstantInt *CI; + if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || + (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { + if (CI->getZExtValue() == NumBits) { + // TODO: Check that the input bits are already zero with MaskedValueIsZero +#if 0 + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } +#endif + + } + } + + // We can't mutate something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + default: return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && + CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); + + case Instruction::Shl: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) return true; + + // We can always turn shl(c)+shr(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned HighBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(TypeWidth, HighBits))) + return true; + } + + return false; + } + case Instruction::LShr: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) return true; + + // We can always turn lshr(c)+shl(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, LowBits))) + return true; + } + + return false; + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && + CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) + return false; + return true; + } + } +} + +/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, +/// this value inserts the new computation that produces the shifted value. +static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (Constant *C = dyn_cast<Constant>(V)) { + if (isLeftShift) + V = IC.Builder->CreateShl(C, NumBits); + else + V = IC.Builder->CreateLShr(C, NumBits); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + return V; + } + + Instruction *I = cast<Instruction>(V); + IC.Worklist.Add(I); + + switch (I->getOpcode()) { + default: assert(0 && "Inconsistency with CanEvaluateShifted"); + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + return I; + + case Instruction::Shl: { + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + return I; + } + + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(I->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + I->setOperand(1, ConstantInt::get(I->getType(), + CI->getZExtValue() - NumBits)); + return I; + } + case Instruction::LShr: { + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + return I; + } + + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(I->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + I->setOperand(1, ConstantInt::get(I->getType(), + CI->getZExtValue() - NumBits)); + return I; + } + + case Instruction::Select: + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + return I; + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), + NumBits, isLeftShift, IC)); + return PN; + } + } +} + + + Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - + + + // See if we can propagate this shift into the input, this covers the trivial + // cast of lshr(shl(x,c1),c2) as well as other more complex cases. + if (I.getOpcode() != Instruction::AShr && + CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" + " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); + + return ReplaceInstUsesWith(I, + GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + } + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); @@ -288,39 +548,17 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ConstantInt::get(Ty, AmtSum)); } - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(), Mask)); - } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. if (ShiftAmt1 == ShiftAmt2) { // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), Mask)); @@ -329,7 +567,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); @@ -340,7 +579,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::Shl); Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); @@ -355,9 +595,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, ConstantInt::get(Ty, ShiftDiff)); @@ -367,8 +606,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index af2958f..e46c679 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -60,8 +60,8 @@ STATISTIC(NumSunkInst , "Number of instructions sunk"); char InstCombiner::ID = 0; -static RegisterPass<InstCombiner> -X("instcombine", "Combine redundant instructions"); +INITIALIZE_PASS(InstCombiner, "instcombine", + "Combine redundant instructions", false, false); void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreservedID(LCSSAID); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 9ae3786..a77d70c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -34,7 +34,7 @@ namespace { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - EdgeProfiler() : ModulePass(&ID) {} + EdgeProfiler() : ModulePass(ID) {} virtual const char *getPassName() const { return "Edge Profiler"; @@ -43,8 +43,8 @@ namespace { } char EdgeProfiler::ID = 0; -static RegisterPass<EdgeProfiler> -X("insert-edge-profiling", "Insert instrumentation for edge profiling"); +INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling", + "Insert instrumentation for edge profiling", false, false); ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 41e3a39..8eec987 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -36,7 +36,7 @@ namespace { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - OptimalEdgeProfiler() : ModulePass(&ID) {} + OptimalEdgeProfiler() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(ProfileEstimatorPassID); @@ -50,9 +50,9 @@ namespace { } char OptimalEdgeProfiler::ID = 0; -static RegisterPass<OptimalEdgeProfiler> -X("insert-optimal-edge-profiling", - "Insert optimal instrumentation for edge profiling"); +INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling", + "Insert optimal instrumentation for edge profiling", + false, false); ModulePass *llvm::createOptimalEdgeProfilerPass() { return new OptimalEdgeProfiler(); diff --git a/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp b/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp deleted file mode 100644 index dcf14a6..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/ABCD.cpp +++ /dev/null @@ -1,1112 +0,0 @@ -//===------- ABCD.cpp - Removes redundant conditional branches ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass removes redundant branch instructions. This algorithm was -// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper -// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original -// Algorithm was created to remove array bound checks for strongly typed -// languages. This implementation expands the idea and removes any conditional -// branches that can be proved redundant, not only those used in array bound -// checks. With the SSI representation, each variable has a -// constraint. By analyzing these constraints we can prove that a branch is -// redundant. When a branch is proved redundant it means that -// one direction will always be taken; thus, we can change this branch into an -// unconditional jump. -// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination -// after ABCD to clean up the code. -// This implementation was created based on the implementation of the ABCD -// algorithm implemented for the compiler Jitrino. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "abcd" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/SSI.h" - -using namespace llvm; - -STATISTIC(NumBranchTested, "Number of conditional branches analyzed"); -STATISTIC(NumBranchRemoved, "Number of conditional branches removed"); - -namespace { - -class ABCD : public FunctionPass { - public: - static char ID; // Pass identification, replacement for typeid. - ABCD() : FunctionPass(&ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<SSI>(); - } - - bool runOnFunction(Function &F); - - private: - /// Keep track of whether we've modified the program yet. - bool modified; - - enum ProveResult { - False = 0, - Reduced = 1, - True = 2 - }; - - typedef ProveResult (*meet_function)(ProveResult, ProveResult); - static ProveResult max(ProveResult res1, ProveResult res2) { - return (ProveResult) std::max(res1, res2); - } - static ProveResult min(ProveResult res1, ProveResult res2) { - return (ProveResult) std::min(res1, res2); - } - - class Bound { - public: - Bound(APInt v, bool upper) : value(v), upper_bound(upper) {} - Bound(const Bound &b, int cnst) - : value(b.value - cnst), upper_bound(b.upper_bound) {} - Bound(const Bound &b, const APInt &cnst) - : value(b.value - cnst), upper_bound(b.upper_bound) {} - - /// Test if Bound is an upper bound - bool isUpperBound() const { return upper_bound; } - - /// Get the bitwidth of this bound - int32_t getBitWidth() const { return value.getBitWidth(); } - - /// Creates a Bound incrementing the one received - static Bound createIncrement(const Bound &b) { - return Bound(b.isUpperBound() ? b.value+1 : b.value-1, - b.upper_bound); - } - - /// Creates a Bound decrementing the one received - static Bound createDecrement(const Bound &b) { - return Bound(b.isUpperBound() ? b.value-1 : b.value+1, - b.upper_bound); - } - - /// Test if two bounds are equal - static bool eq(const Bound *a, const Bound *b) { - if (!a || !b) return false; - - assert(a->isUpperBound() == b->isUpperBound()); - return a->value == b->value; - } - - /// Test if val is less than or equal to Bound b - static bool leq(APInt val, const Bound &b) { - return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value); - } - - /// Test if Bound a is less then or equal to Bound - static bool leq(const Bound &a, const Bound &b) { - assert(a.isUpperBound() == b.isUpperBound()); - return a.isUpperBound() ? a.value.sle(b.value) : - a.value.sge(b.value); - } - - /// Test if Bound a is less then Bound b - static bool lt(const Bound &a, const Bound &b) { - assert(a.isUpperBound() == b.isUpperBound()); - return a.isUpperBound() ? a.value.slt(b.value) : - a.value.sgt(b.value); - } - - /// Test if Bound b is greater then or equal val - static bool geq(const Bound &b, APInt val) { - return leq(val, b); - } - - /// Test if Bound a is greater then or equal Bound b - static bool geq(const Bound &a, const Bound &b) { - return leq(b, a); - } - - private: - APInt value; - bool upper_bound; - }; - - /// This class is used to store results some parts of the graph, - /// so information does not need to be recalculated. The maximum false, - /// minimum true and minimum reduced results are stored - class MemoizedResultChart { - public: - MemoizedResultChart() {} - MemoizedResultChart(const MemoizedResultChart &other) { - if (other.max_false) - max_false.reset(new Bound(*other.max_false)); - if (other.min_true) - min_true.reset(new Bound(*other.min_true)); - if (other.min_reduced) - min_reduced.reset(new Bound(*other.min_reduced)); - } - - /// Returns the max false - const Bound *getFalse() const { return max_false.get(); } - - /// Returns the min true - const Bound *getTrue() const { return min_true.get(); } - - /// Returns the min reduced - const Bound *getReduced() const { return min_reduced.get(); } - - /// Return the stored result for this bound - ProveResult getResult(const Bound &bound) const; - - /// Stores a false found - void addFalse(const Bound &bound); - - /// Stores a true found - void addTrue(const Bound &bound); - - /// Stores a Reduced found - void addReduced(const Bound &bound); - - /// Clears redundant reduced - /// If a min_true is smaller than a min_reduced then the min_reduced - /// is unnecessary and then removed. It also works for min_reduced - /// begin smaller than max_false. - void clearRedundantReduced(); - - void clear() { - max_false.reset(); - min_true.reset(); - min_reduced.reset(); - } - - private: - OwningPtr<Bound> max_false, min_true, min_reduced; - }; - - /// This class stores the result found for a node of the graph, - /// so these results do not need to be recalculated, only searched for. - class MemoizedResult { - public: - /// Test if there is true result stored from b to a - /// that is less then the bound - bool hasTrue(Value *b, const Bound &bound) const { - const Bound *trueBound = map.lookup(b).getTrue(); - return trueBound && Bound::leq(*trueBound, bound); - } - - /// Test if there is false result stored from b to a - /// that is less then the bound - bool hasFalse(Value *b, const Bound &bound) const { - const Bound *falseBound = map.lookup(b).getFalse(); - return falseBound && Bound::leq(*falseBound, bound); - } - - /// Test if there is reduced result stored from b to a - /// that is less then the bound - bool hasReduced(Value *b, const Bound &bound) const { - const Bound *reducedBound = map.lookup(b).getReduced(); - return reducedBound && Bound::leq(*reducedBound, bound); - } - - /// Returns the stored bound for b - ProveResult getBoundResult(Value *b, const Bound &bound) { - return map[b].getResult(bound); - } - - /// Clears the map - void clear() { - DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin(); - DenseMapIterator<Value*, MemoizedResultChart> end = map.end(); - for (; begin != end; ++begin) { - begin->second.clear(); - } - map.clear(); - } - - /// Stores the bound found - void updateBound(Value *b, const Bound &bound, const ProveResult res); - - private: - // Maps a nod in the graph with its results found. - DenseMap<Value*, MemoizedResultChart> map; - }; - - /// This class represents an edge in the inequality graph used by the - /// ABCD algorithm. An edge connects node v to node u with a value c if - /// we could infer a constraint v <= u + c in the source program. - class Edge { - public: - Edge(Value *V, APInt val, bool upper) - : vertex(V), value(val), upper_bound(upper) {} - - Value *getVertex() const { return vertex; } - const APInt &getValue() const { return value; } - bool isUpperBound() const { return upper_bound; } - - private: - Value *vertex; - APInt value; - bool upper_bound; - }; - - /// Weighted and Directed graph to represent constraints. - /// There is one type of constraint, a <= b + X, which will generate an - /// edge from b to a with weight X. - class InequalityGraph { - public: - - /// Adds an edge from V_from to V_to with weight value - void addEdge(Value *V_from, Value *V_to, APInt value, bool upper); - - /// Test if there is a node V - bool hasNode(Value *V) const { return graph.count(V); } - - /// Test if there is any edge from V in the upper direction - bool hasEdge(Value *V, bool upper) const; - - /// Returns all edges pointed by vertex V - SmallVector<Edge, 16> getEdges(Value *V) const { - return graph.lookup(V); - } - - /// Prints the graph in dot format. - /// Blue edges represent upper bound and Red lower bound. - void printGraph(raw_ostream &OS, Function &F) const { - printHeader(OS, F); - printBody(OS); - printFooter(OS); - } - - /// Clear the graph - void clear() { - graph.clear(); - } - - private: - DenseMap<Value *, SmallVector<Edge, 16> > graph; - - /// Prints the header of the dot file - void printHeader(raw_ostream &OS, Function &F) const; - - /// Prints the footer of the dot file - void printFooter(raw_ostream &OS) const { - OS << "}\n"; - } - - /// Prints the body of the dot file - void printBody(raw_ostream &OS) const; - - /// Prints vertex source to the dot file - void printVertex(raw_ostream &OS, Value *source) const; - - /// Prints the edge to the dot file - void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const; - - void printName(raw_ostream &OS, Value *info) const; - }; - - /// Iterates through all BasicBlocks, if the Terminator Instruction - /// uses an Comparator Instruction, all operands of this comparator - /// are sent to be transformed to SSI. Only Instruction operands are - /// transformed. - void createSSI(Function &F); - - /// Creates the graphs for this function. - /// It will look for all comparators used in branches, and create them. - /// These comparators will create constraints for any instruction as an - /// operand. - void executeABCD(Function &F); - - /// Seeks redundancies in the comparator instruction CI. - /// If the ABCD algorithm can prove that the comparator CI always - /// takes one way, then the Terminator Instruction TI is substituted from - /// a conditional branch to a unconditional one. - /// This code basically receives a comparator, and verifies which kind of - /// instruction it is. Depending on the kind of instruction, we use different - /// strategies to prove its redundancy. - void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI); - - /// Substitutes Terminator Instruction TI, that is a conditional branch, - /// with one unconditional branch. Succ_edge determines if the new - /// unconditional edge will be the first or second edge of the former TI - /// instruction. - void removeRedundancy(TerminatorInst *TI, bool Succ_edge); - - /// When an conditional branch is removed, the BasicBlock that is no longer - /// reachable will have problems in phi functions. This method fixes these - /// phis removing the former BasicBlock from the list of incoming BasicBlocks - /// of all phis. In case the phi remains with no predecessor it will be - /// marked to be removed later. - void fixPhi(BasicBlock *BB, BasicBlock *Succ); - - /// Removes phis that have no predecessor - void removePhis(); - - /// Creates constraints for Instructions. - /// If the constraint for this instruction has already been created - /// nothing is done. - void createConstraintInstruction(Instruction *I); - - /// Creates constraints for Binary Operators. - /// It will create constraints only for addition and subtraction, - /// the other binary operations are not treated by ABCD. - /// For additions in the form a = b + X and a = X + b, where X is a constant, - /// the constraint a <= b + X can be obtained. For this constraint, an edge - /// a->b with weight X is added to the lower bound graph, and an edge - /// b->a with weight -X is added to the upper bound graph. - /// Only subtractions in the format a = b - X is used by ABCD. - /// Edges are created using the same semantic as addition. - void createConstraintBinaryOperator(BinaryOperator *BO); - - /// Creates constraints for Comparator Instructions. - /// Only comparators that have any of the following operators - /// are used to create constraints: >=, >, <=, <. And only if - /// at least one operand is an Instruction. In a Comparator Instruction - /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where - /// t and f represent sigma for operands in true and false branches. The - /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and - /// b_f <= b. There are two more constraints that depend on the operator. - /// For the operator <= : a_t <= b_t and b_f <= a_f-1 - /// For the operator < : a_t <= b_t-1 and b_f <= a_f - /// For the operator >= : b_t <= a_t and a_f <= b_f-1 - /// For the operator > : b_t <= a_t-1 and a_f <= b_f - void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI); - - /// Creates constraints for PHI nodes. - /// In a PHI node a = phi(b,c) we can create the constraint - /// a<= max(b,c). With this constraint there will be the edges, - /// b->a and c->a with weight 0 in the lower bound graph, and the edges - /// a->b and a->c with weight 0 in the upper bound graph. - void createConstraintPHINode(PHINode *PN); - - /// Given a binary operator, we are only interest in the case - /// that one operand is an Instruction and the other is a ConstantInt. In - /// this case the method returns true, otherwise false. It also obtains the - /// Instruction and ConstantInt from the BinaryOperator and returns it. - bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); - - /// This method creates a constraint between a Sigma and an Instruction. - /// These constraints are created as soon as we find a comparator that uses a - /// SSI variable. - void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, - BasicBlock *BB_succ_f, PHINode **SIG_op_t, - PHINode **SIG_op_f); - - /// If PN_op1 and PN_o2 are different from NULL, create a constraint - /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace - /// with the respective V_op#, if V_op# is a ConstantInt. - void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, - ConstantInt *V_op1, ConstantInt *V_op2, - APInt value); - - /// Returns the sigma representing the Instruction I in BasicBlock BB. - /// Returns NULL in case there is no sigma for this Instruction in this - /// Basic Block. This methods assume that sigmas are the first instructions - /// in a block, and that there can be only two sigmas in a block. So it will - /// only look on the first two instructions of BasicBlock BB. - PHINode *findSigma(BasicBlock *BB, Instruction *I); - - /// Original ABCD algorithm to prove redundant checks. - /// This implementation works on any kind of inequality branch. - bool demandProve(Value *a, Value *b, int c, bool upper_bound); - - /// Prove that distance between b and a is <= bound - ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level); - - /// Updates the distance value for a and b - void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level, - meet_function meet); - - InequalityGraph inequality_graph; - MemoizedResult mem_result; - DenseMap<Value*, const Bound*> active; - SmallPtrSet<Value*, 16> created; - SmallVector<PHINode *, 16> phis_to_remove; -}; - -} // end anonymous namespace. - -char ABCD::ID = 0; -static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand"); - - -bool ABCD::runOnFunction(Function &F) { - modified = false; - createSSI(F); - executeABCD(F); - DEBUG(inequality_graph.printGraph(dbgs(), F)); - removePhis(); - - inequality_graph.clear(); - mem_result.clear(); - active.clear(); - created.clear(); - phis_to_remove.clear(); - return modified; -} - -/// Iterates through all BasicBlocks, if the Terminator Instruction -/// uses an Comparator Instruction, all operands of this comparator -/// are sent to be transformed to SSI. Only Instruction operands are -/// transformed. -void ABCD::createSSI(Function &F) { - SSI *ssi = &getAnalysis<SSI>(); - - SmallVector<Instruction *, 16> Insts; - - for (Function::iterator begin = F.begin(), end = F.end(); - begin != end; ++begin) { - BasicBlock *BB = begin; - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumOperands() == 0) - continue; - - if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) { - if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) { - modified = true; // XXX: but yet createSSI might do nothing - Insts.push_back(I); - } - if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) { - modified = true; - Insts.push_back(I); - } - } - } - ssi->createSSI(Insts); -} - -/// Creates the graphs for this function. -/// It will look for all comparators used in branches, and create them. -/// These comparators will create constraints for any instruction as an -/// operand. -void ABCD::executeABCD(Function &F) { - for (Function::iterator begin = F.begin(), end = F.end(); - begin != end; ++begin) { - BasicBlock *BB = begin; - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumOperands() == 0) - continue; - - ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0)); - if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy()) - continue; - - createConstraintCmpInst(ICI, TI); - seekRedundancy(ICI, TI); - } -} - -/// Seeks redundancies in the comparator instruction CI. -/// If the ABCD algorithm can prove that the comparator CI always -/// takes one way, then the Terminator Instruction TI is substituted from -/// a conditional branch to a unconditional one. -/// This code basically receives a comparator, and verifies which kind of -/// instruction it is. Depending on the kind of instruction, we use different -/// strategies to prove its redundancy. -void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) { - CmpInst::Predicate Pred = ICI->getPredicate(); - - Value *source, *dest; - int distance1, distance2; - bool upper; - - switch(Pred) { - case CmpInst::ICMP_SGT: // signed greater than - upper = false; - distance1 = 1; - distance2 = 0; - break; - - case CmpInst::ICMP_SGE: // signed greater or equal - upper = false; - distance1 = 0; - distance2 = -1; - break; - - case CmpInst::ICMP_SLT: // signed less than - upper = true; - distance1 = -1; - distance2 = 0; - break; - - case CmpInst::ICMP_SLE: // signed less or equal - upper = true; - distance1 = 0; - distance2 = 1; - break; - - default: - return; - } - - ++NumBranchTested; - source = ICI->getOperand(0); - dest = ICI->getOperand(1); - if (demandProve(dest, source, distance1, upper)) { - removeRedundancy(TI, true); - } else if (demandProve(dest, source, distance2, !upper)) { - removeRedundancy(TI, false); - } -} - -/// Substitutes Terminator Instruction TI, that is a conditional branch, -/// with one unconditional branch. Succ_edge determines if the new -/// unconditional edge will be the first or second edge of the former TI -/// instruction. -void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) { - BasicBlock *Succ; - if (Succ_edge) { - Succ = TI->getSuccessor(0); - fixPhi(TI->getParent(), TI->getSuccessor(1)); - } else { - Succ = TI->getSuccessor(1); - fixPhi(TI->getParent(), TI->getSuccessor(0)); - } - - BranchInst::Create(Succ, TI); - TI->eraseFromParent(); // XXX: invoke - ++NumBranchRemoved; - modified = true; -} - -/// When an conditional branch is removed, the BasicBlock that is no longer -/// reachable will have problems in phi functions. This method fixes these -/// phis removing the former BasicBlock from the list of incoming BasicBlocks -/// of all phis. In case the phi remains with no predecessor it will be -/// marked to be removed later. -void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) { - BasicBlock::iterator begin = Succ->begin(); - while (PHINode *PN = dyn_cast<PHINode>(begin++)) { - PN->removeIncomingValue(BB, false); - if (PN->getNumIncomingValues() == 0) - phis_to_remove.push_back(PN); - } -} - -/// Removes phis that have no predecessor -void ABCD::removePhis() { - for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) { - PHINode *PN = phis_to_remove[i]; - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - PN->eraseFromParent(); - } -} - -/// Creates constraints for Instructions. -/// If the constraint for this instruction has already been created -/// nothing is done. -void ABCD::createConstraintInstruction(Instruction *I) { - // Test if this instruction has not been created before - if (created.insert(I)) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - createConstraintBinaryOperator(BO); - } else if (PHINode *PN = dyn_cast<PHINode>(I)) { - createConstraintPHINode(PN); - } - } -} - -/// Creates constraints for Binary Operators. -/// It will create constraints only for addition and subtraction, -/// the other binary operations are not treated by ABCD. -/// For additions in the form a = b + X and a = X + b, where X is a constant, -/// the constraint a <= b + X can be obtained. For this constraint, an edge -/// a->b with weight X is added to the lower bound graph, and an edge -/// b->a with weight -X is added to the upper bound graph. -/// Only subtractions in the format a = b - X is used by ABCD. -/// Edges are created using the same semantic as addition. -void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) { - Instruction *I1 = NULL, *I2 = NULL; - ConstantInt *CI1 = NULL, *CI2 = NULL; - - // Test if an operand is an Instruction and the other is a Constant - if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2)) - return; - - Instruction *I = 0; - APInt value; - - switch (BO->getOpcode()) { - case Instruction::Add: - if (I1) { - I = I1; - value = CI2->getValue(); - } else if (I2) { - I = I2; - value = CI1->getValue(); - } - break; - - case Instruction::Sub: - // Instructions like a = X-b, where X is a constant are not represented - // in the graph. - if (!I1) - return; - - I = I1; - value = -CI2->getValue(); - break; - - default: - return; - } - - inequality_graph.addEdge(I, BO, value, true); - inequality_graph.addEdge(BO, I, -value, false); - createConstraintInstruction(I); -} - -/// Given a binary operator, we are only interest in the case -/// that one operand is an Instruction and the other is a ConstantInt. In -/// this case the method returns true, otherwise false. It also obtains the -/// Instruction and ConstantInt from the BinaryOperator and returns it. -bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2) { - Value *op1 = BO->getOperand(0); - Value *op2 = BO->getOperand(1); - - if ((*I1 = dyn_cast<Instruction>(op1))) { - if ((*C2 = dyn_cast<ConstantInt>(op2))) - return true; // First is Instruction and second ConstantInt - - return false; // Both are Instruction - } else { - if ((*C1 = dyn_cast<ConstantInt>(op1)) && - (*I2 = dyn_cast<Instruction>(op2))) - return true; // First is ConstantInt and second Instruction - - return false; // Both are not Instruction - } -} - -/// Creates constraints for Comparator Instructions. -/// Only comparators that have any of the following operators -/// are used to create constraints: >=, >, <=, <. And only if -/// at least one operand is an Instruction. In a Comparator Instruction -/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where -/// t and f represent sigma for operands in true and false branches. The -/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and -/// b_f <= b. There are two more constraints that depend on the operator. -/// For the operator <= : a_t <= b_t and b_f <= a_f-1 -/// For the operator < : a_t <= b_t-1 and b_f <= a_f -/// For the operator >= : b_t <= a_t and a_f <= b_f-1 -/// For the operator > : b_t <= a_t-1 and a_f <= b_f -void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { - Value *V_op1 = ICI->getOperand(0); - Value *V_op2 = ICI->getOperand(1); - - if (!V_op1->getType()->isIntegerTy()) - return; - - Instruction *I_op1 = dyn_cast<Instruction>(V_op1); - Instruction *I_op2 = dyn_cast<Instruction>(V_op2); - - // Test if at least one operand is an Instruction - if (!I_op1 && !I_op2) - return; - - BasicBlock *BB_succ_t = TI->getSuccessor(0); - BasicBlock *BB_succ_f = TI->getSuccessor(1); - - PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL, - *SIG_op2_t = NULL, *SIG_op2_f = NULL; - - createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f); - createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f); - - int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth(); - APInt MinusOne = APInt::getAllOnesValue(width); - APInt Zero = APInt::getNullValue(width); - - CmpInst::Predicate Pred = ICI->getPredicate(); - ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1); - ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2); - switch (Pred) { - case CmpInst::ICMP_SGT: // signed greater than - createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero); - break; - - case CmpInst::ICMP_SGE: // signed greater or equal - createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne); - break; - - case CmpInst::ICMP_SLT: // signed less than - createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero); - break; - - case CmpInst::ICMP_SLE: // signed less or equal - createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne); - break; - - default: - break; - } - - if (I_op1) - createConstraintInstruction(I_op1); - if (I_op2) - createConstraintInstruction(I_op2); -} - -/// Creates constraints for PHI nodes. -/// In a PHI node a = phi(b,c) we can create the constraint -/// a<= max(b,c). With this constraint there will be the edges, -/// b->a and c->a with weight 0 in the lower bound graph, and the edges -/// a->b and a->c with weight 0 in the upper bound graph. -void ABCD::createConstraintPHINode(PHINode *PN) { - // FIXME: We really want to disallow sigma nodes, but I don't know the best - // way to detect the other than this. - if (PN->getNumOperands() == 2) return; - - int32_t width = cast<IntegerType>(PN->getType())->getBitWidth(); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = PN->getIncomingValue(i); - if (Instruction *I = dyn_cast<Instruction>(V)) { - createConstraintInstruction(I); - } - inequality_graph.addEdge(V, PN, APInt(width, 0), true); - inequality_graph.addEdge(V, PN, APInt(width, 0), false); - } -} - -/// This method creates a constraint between a Sigma and an Instruction. -/// These constraints are created as soon as we find a comparator that uses a -/// SSI variable. -void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, - BasicBlock *BB_succ_f, PHINode **SIG_op_t, - PHINode **SIG_op_f) { - *SIG_op_t = findSigma(BB_succ_t, I_op); - *SIG_op_f = findSigma(BB_succ_f, I_op); - - if (*SIG_op_t) { - int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth(); - inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true); - inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false); - } - if (*SIG_op_f) { - int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth(); - inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true); - inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false); - } -} - -/// If PN_op1 and PN_o2 are different from NULL, create a constraint -/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace -/// with the respective V_op#, if V_op# is a ConstantInt. -void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, - ConstantInt *V_op1, ConstantInt *V_op2, - APInt value) { - if (SIG_op1 && SIG_op2) { - inequality_graph.addEdge(SIG_op2, SIG_op1, value, true); - inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false); - } else if (SIG_op1 && V_op2) { - inequality_graph.addEdge(V_op2, SIG_op1, value, true); - inequality_graph.addEdge(SIG_op1, V_op2, -value, false); - } else if (SIG_op2 && V_op1) { - inequality_graph.addEdge(SIG_op2, V_op1, value, true); - inequality_graph.addEdge(V_op1, SIG_op2, -value, false); - } -} - -/// Returns the sigma representing the Instruction I in BasicBlock BB. -/// Returns NULL in case there is no sigma for this Instruction in this -/// Basic Block. This methods assume that sigmas are the first instructions -/// in a block, and that there can be only two sigmas in a block. So it will -/// only look on the first two instructions of BasicBlock BB. -PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) { - // BB has more than one predecessor, BB cannot have sigmas. - if (I == NULL || BB->getSinglePredecessor() == NULL) - return NULL; - - BasicBlock::iterator begin = BB->begin(); - BasicBlock::iterator end = BB->end(); - - for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) { - Instruction *I_succ = begin; - if (PHINode *PN = dyn_cast<PHINode>(I_succ)) - if (PN->getIncomingValue(0) == I) - return PN; - } - - return NULL; -} - -/// Original ABCD algorithm to prove redundant checks. -/// This implementation works on any kind of inequality branch. -bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) { - int32_t width = cast<IntegerType>(a->getType())->getBitWidth(); - Bound bound(APInt(width, c), upper_bound); - - mem_result.clear(); - active.clear(); - - ProveResult res = prove(a, b, bound, 0); - return res != False; -} - -/// Prove that distance between b and a is <= bound -ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound, - unsigned level) { - // if (C[b-a<=e] == True for some e <= bound - // Same or stronger difference was already proven - if (mem_result.hasTrue(b, bound)) - return True; - - // if (C[b-a<=e] == False for some e >= bound - // Same or weaker difference was already disproved - if (mem_result.hasFalse(b, bound)) - return False; - - // if (C[b-a<=e] == Reduced for some e <= bound - // b is on a cycle that was reduced for same or stronger difference - if (mem_result.hasReduced(b, bound)) - return Reduced; - - // traversal reached the source vertex - if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true))) - return True; - - // if b has no predecessor then fail - if (!inequality_graph.hasEdge(b, bound.isUpperBound())) - return False; - - // a cycle was encountered - if (active.count(b)) { - if (Bound::leq(*active.lookup(b), bound)) - return Reduced; // a "harmless" cycle - - return False; // an amplifying cycle - } - - active[b] = &bound; - PHINode *PN = dyn_cast<PHINode>(b); - - // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming - // value, then it is a phi, if it has 1 incoming value it is a sigma. - if (PN && PN->getNumIncomingValues() > 1) - updateMemDistance(a, b, bound, level, min); - else - updateMemDistance(a, b, bound, level, max); - - active.erase(b); - - ABCD::ProveResult res = mem_result.getBoundResult(b, bound); - return res; -} - -/// Updates the distance value for a and b -void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound, - unsigned level, meet_function meet) { - ABCD::ProveResult res = (meet == max) ? False : True; - - SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b); - SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end(); - - for (; begin != end ; ++begin) { - if (((res >= Reduced) && (meet == max)) || - ((res == False) && (meet == min))) { - break; - } - const Edge &in = *begin; - if (in.isUpperBound() == bound.isUpperBound()) { - Value *succ = in.getVertex(); - res = meet(res, prove(a, succ, Bound(bound, in.getValue()), - level+1)); - } - } - - mem_result.updateBound(b, bound, res); -} - -/// Return the stored result for this bound -ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{ - if (max_false && Bound::leq(bound, *max_false)) - return False; - if (min_true && Bound::leq(*min_true, bound)) - return True; - if (min_reduced && Bound::leq(*min_reduced, bound)) - return Reduced; - return False; -} - -/// Stores a false found -void ABCD::MemoizedResultChart::addFalse(const Bound &bound) { - if (!max_false || Bound::leq(*max_false, bound)) - max_false.reset(new Bound(bound)); - - if (Bound::eq(max_false.get(), min_reduced.get())) - min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced))); - if (Bound::eq(max_false.get(), min_true.get())) - min_true.reset(new Bound(Bound::createIncrement(*min_true))); - if (Bound::eq(min_reduced.get(), min_true.get())) - min_reduced.reset(); - clearRedundantReduced(); -} - -/// Stores a true found -void ABCD::MemoizedResultChart::addTrue(const Bound &bound) { - if (!min_true || Bound::leq(bound, *min_true)) - min_true.reset(new Bound(bound)); - - if (Bound::eq(min_true.get(), min_reduced.get())) - min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced))); - if (Bound::eq(min_true.get(), max_false.get())) - max_false.reset(new Bound(Bound::createDecrement(*max_false))); - if (Bound::eq(max_false.get(), min_reduced.get())) - min_reduced.reset(); - clearRedundantReduced(); -} - -/// Stores a Reduced found -void ABCD::MemoizedResultChart::addReduced(const Bound &bound) { - if (!min_reduced || Bound::leq(bound, *min_reduced)) - min_reduced.reset(new Bound(bound)); - - if (Bound::eq(min_reduced.get(), min_true.get())) - min_true.reset(new Bound(Bound::createIncrement(*min_true))); - if (Bound::eq(min_reduced.get(), max_false.get())) - max_false.reset(new Bound(Bound::createDecrement(*max_false))); -} - -/// Clears redundant reduced -/// If a min_true is smaller than a min_reduced then the min_reduced -/// is unnecessary and then removed. It also works for min_reduced -/// begin smaller than max_false. -void ABCD::MemoizedResultChart::clearRedundantReduced() { - if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced)) - min_reduced.reset(); - if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false)) - min_reduced.reset(); -} - -/// Stores the bound found -void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound, - const ProveResult res) { - if (res == False) { - map[b].addFalse(bound); - } else if (res == True) { - map[b].addTrue(bound); - } else { - map[b].addReduced(bound); - } -} - -/// Adds an edge from V_from to V_to with weight value -void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from, - APInt value, bool upper) { - assert(V_from->getType() == V_to->getType()); - assert(cast<IntegerType>(V_from->getType())->getBitWidth() == - value.getBitWidth()); - - graph[V_from].push_back(Edge(V_to, value, upper)); -} - -/// Test if there is any edge from V in the upper direction -bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const { - SmallVector<Edge, 16> it = graph.lookup(V); - - SmallVector<Edge, 16>::iterator begin = it.begin(); - SmallVector<Edge, 16>::iterator end = it.end(); - for (; begin != end; ++begin) { - if (begin->isUpperBound() == upper) { - return true; - } - } - return false; -} - -/// Prints the header of the dot file -void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const { - OS << "digraph dotgraph {\n"; - OS << "label=\"Inequality Graph for \'"; - OS << F.getNameStr() << "\' function\";\n"; - OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n"; -} - -/// Prints the body of the dot file -void ABCD::InequalityGraph::printBody(raw_ostream &OS) const { - DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin = - graph.begin(), end = graph.end(); - - for (; begin != end ; ++begin) { - SmallVector<Edge, 16>::const_iterator begin_par = - begin->second.begin(), end_par = begin->second.end(); - Value *source = begin->first; - - printVertex(OS, source); - - for (; begin_par != end_par ; ++begin_par) { - const Edge &edge = *begin_par; - printEdge(OS, source, edge); - } - } -} - -/// Prints vertex source to the dot file -/// -void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const { - OS << "\""; - printName(OS, source); - OS << "\""; - OS << " [label=\"{"; - printName(OS, source); - OS << "}\"];\n"; -} - -/// Prints the edge to the dot file -void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source, - const Edge &edge) const { - Value *dest = edge.getVertex(); - APInt value = edge.getValue(); - bool upper = edge.isUpperBound(); - - OS << "\""; - printName(OS, source); - OS << "\""; - OS << " -> "; - OS << "\""; - printName(OS, dest); - OS << "\""; - OS << " [label=\"" << value << "\""; - if (upper) { - OS << "color=\"blue\""; - } else { - OS << "color=\"red\""; - } - OS << "];\n"; -} - -void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const { - if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) { - OS << *CI; - } else { - if (!info->hasName()) { - info->setName("V"); - } - OS << info->getNameStr(); - } -} - -/// createABCDPass - The public interface to this file... -FunctionPass *llvm::createABCDPass() { - return new ABCD(); -} diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp index 2d19467..ada086e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed"); namespace { struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ADCE() : FunctionPass(&ID) {} + ADCE() : FunctionPass(ID) {} virtual bool runOnFunction(Function& F); @@ -45,7 +45,7 @@ namespace { } char ADCE::ID = 0; -static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination"); +INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false); bool ADCE::runOnFunction(Function& F) { SmallPtrSet<Instruction*, 128> alive; diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp index 54533f5..b144678 100644 --- a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved"); namespace { struct BlockPlacement : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BlockPlacement() : FunctionPass(&ID) {} + BlockPlacement() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -74,8 +74,8 @@ namespace { } char BlockPlacement::ID = 0; -static RegisterPass<BlockPlacement> -X("block-placement", "Profile Guided Basic Block Placement"); +INITIALIZE_PASS(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false); FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt b/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt index 1a3b10c..b7598ea 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -1,9 +1,9 @@ add_llvm_library(LLVMScalarOpts - ABCD.cpp ADCE.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp ConstantProp.cpp + CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp GEPSplitter.cpp @@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LowerAtomic.cpp MemCpyOptimizer.cpp Reassociate.cpp Reg2Mem.cpp diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 272066c..e07b761 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -41,6 +42,11 @@ using namespace llvm; using namespace llvm::PatternMatch; +static cl::opt<bool> +CriticalEdgeSplit("cgp-critical-edge-splitting", + cl::desc("Split critical edges during codegen prepare"), + cl::init(true), cl::Hidden); + namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -54,7 +60,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) - : FunctionPass(&ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) {} bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -82,8 +88,8 @@ namespace { } char CodeGenPrepare::ID = 0; -static RegisterPass<CodeGenPrepare> X("codegenprepare", - "Optimize for code generation"); +INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false); FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); @@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) + if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) + if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. @@ -548,9 +554,9 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - - CallInst::ArgOffset))) - return SizeCI->isAllOnesValue(); + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) + return SizeCI->isAllOnesValue(); return false; } }; @@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool MadeChange = false; // Split all critical edges where the dest block has a PHI. - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); + if (CriticalEdgeSplit) { + TerminatorInst *BBTI = BB.getTerminator(); + if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { + for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { + BasicBlock *SuccBB = BBTI->getSuccessor(i); + if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) + SplitEdgeNicely(BBTI, i, BackEdges, this); + } } } diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp index ea20813..a0ea369 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp @@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { struct ConstantPropagation : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ConstantPropagation() : FunctionPass(&ID) {} + ConstantPropagation() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -45,8 +45,8 @@ namespace { } char ConstantPropagation::ID = 0; -static RegisterPass<ConstantPropagation> -X("constprop", "Simple constant propagation"); +INITIALIZE_PASS(ConstantPropagation, "constprop", + "Simple constant propagation", false, false); FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp new file mode 100644 index 0000000..0d4e45d --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -0,0 +1,200 @@ +//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Correlated Value Propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "correlated-value-propagation" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPhis, "Number of phis propagated"); +STATISTIC(NumSelects, "Number of selects propagated"); +STATISTIC(NumMemAccess, "Number of memory access targets propagated"); +STATISTIC(NumCmps, "Number of comparisons propagated"); + +namespace { + class CorrelatedValuePropagation : public FunctionPass { + LazyValueInfo *LVI; + + bool processSelect(SelectInst *SI); + bool processPHI(PHINode *P); + bool processMemAccess(Instruction *I); + bool processCmp(CmpInst *C); + + public: + static char ID; + CorrelatedValuePropagation(): FunctionPass(ID) { } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LazyValueInfo>(); + } + }; +} + +char CorrelatedValuePropagation::ID = 0; +INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false); + +// Public interface to the Value Propagation pass +Pass *llvm::createCorrelatedValuePropagationPass() { + return new CorrelatedValuePropagation(); +} + +bool CorrelatedValuePropagation::processSelect(SelectInst *S) { + if (S->getType()->isVectorTy()) return false; + if (isa<Constant>(S->getOperand(0))) return false; + + Constant *C = LVI->getConstant(S->getOperand(0), S->getParent()); + if (!C) return false; + + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return false; + + S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2)); + S->eraseFromParent(); + + ++NumSelects; + + return true; +} + +bool CorrelatedValuePropagation::processPHI(PHINode *P) { + bool Changed = false; + + BasicBlock *BB = P->getParent(); + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + Value *Incoming = P->getIncomingValue(i); + if (isa<Constant>(Incoming)) continue; + + Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), + P->getIncomingBlock(i), + BB); + if (!C) continue; + + P->setIncomingValue(i, C); + Changed = true; + } + + if (Value *ConstVal = P->hasConstantValue()) { + P->replaceAllUsesWith(ConstVal); + P->eraseFromParent(); + Changed = true; + } + + ++NumPhis; + + return Changed; +} + +bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { + Value *Pointer = 0; + if (LoadInst *L = dyn_cast<LoadInst>(I)) + Pointer = L->getPointerOperand(); + else + Pointer = cast<StoreInst>(I)->getPointerOperand(); + + if (isa<Constant>(Pointer)) return false; + + Constant *C = LVI->getConstant(Pointer, I->getParent()); + if (!C) return false; + + ++NumMemAccess; + I->replaceUsesOfWith(Pointer, C); + return true; +} + +/// processCmp - If the value of this comparison could be determined locally, +/// constant propagation would already have figured it out. Instead, walk +/// the predecessors and statically evaluate the comparison based on information +/// available on that edge. If a given static evaluation is true on ALL +/// incoming edges, then it's true universally and we can simplify the compare. +bool CorrelatedValuePropagation::processCmp(CmpInst *C) { + Value *Op0 = C->getOperand(0); + if (isa<Instruction>(Op0) && + cast<Instruction>(Op0)->getParent() == C->getParent()) + return false; + + Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); + if (!Op1) return false; + + pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent()); + if (PI == PE) return false; + + LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Result == LazyValueInfo::Unknown) return false; + + ++PI; + while (PI != PE) { + LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Res != Result) return false; + ++PI; + } + + ++NumCmps; + + if (Result == LazyValueInfo::True) + C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext())); + else + C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext())); + + C->eraseFromParent(); + + return true; +} + +bool CorrelatedValuePropagation::runOnFunction(Function &F) { + LVI = &getAnalysis<LazyValueInfo>(); + + bool FnChanged = false; + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + bool BBChanged = false; + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { + Instruction *II = BI++; + switch (II->getOpcode()) { + case Instruction::Select: + BBChanged |= processSelect(cast<SelectInst>(II)); + break; + case Instruction::PHI: + BBChanged |= processPHI(cast<PHINode>(II)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + BBChanged |= processCmp(cast<CmpInst>(II)); + break; + case Instruction::Load: + case Instruction::Store: + BBChanged |= processMemAccess(II); + break; + } + } + + // Propagating correlated values might leave cruft around. + // Try to clean it up before we continue. + if (BBChanged) + SimplifyInstructionsInBlock(FI); + + FnChanged |= BBChanged; + } + + return FnChanged; +} diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp index 39940c3..87ea803 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp @@ -35,7 +35,7 @@ namespace { // struct DeadInstElimination : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - DeadInstElimination() : BasicBlockPass(&ID) {} + DeadInstElimination() : BasicBlockPass(ID) {} virtual bool runOnBasicBlock(BasicBlock &BB) { bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { @@ -56,8 +56,8 @@ namespace { } char DeadInstElimination::ID = 0; -static RegisterPass<DeadInstElimination> -X("die", "Dead Instruction Elimination"); +INITIALIZE_PASS(DeadInstElimination, "die", + "Dead Instruction Elimination", false, false); Pass *llvm::createDeadInstEliminationPass() { return new DeadInstElimination(); @@ -70,7 +70,7 @@ namespace { // struct DCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - DCE() : FunctionPass(&ID) {} + DCE() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -81,7 +81,7 @@ namespace { } char DCE::ID = 0; -static RegisterPass<DCE> Y("dce", "Dead Code Elimination"); +INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false); bool DCE::runOnFunction(Function &F) { // Start out with all of the instructions in the worklist... diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index e047e4f..c8fd9d9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,7 +40,7 @@ namespace { TargetData *TD; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(&ID) {} + DSE() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { bool Changed = false; @@ -82,7 +82,7 @@ namespace { } char DSE::ID = 0; -static RegisterPass<DSE> X("dse", "Dead Store Elimination"); +INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false); FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } @@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } continue; - } else if (CallSite::get(BBI).getInstruction() != 0) { + } else if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't // be undeadifying any of our pointers. - CallSite CS = CallSite::get(BBI); if (AA.doesNotAccessMemory(CS)) continue; diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp index 610a41d..53dd06d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp @@ -27,13 +27,13 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const; public: static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(&ID) {} + explicit GEPSplitter() : FunctionPass(ID) {} }; } char GEPSplitter::ID = 0; -static RegisterPass<GEPSplitter> X("split-geps", - "split complex GEPs into simple GEPs"); +INITIALIZE_PASS(GEPSplitter, "split-geps", + "split complex GEPs into simple GEPs", false, false); FunctionPass *llvm::createGEPSplitterPass() { return new GEPSplitter(); diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index 88b6776..c62ce1f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -165,7 +165,6 @@ namespace { Expression create_expression(CastInst* C); Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); Expression create_expression(ExtractValueInst* C); Expression create_expression(InsertValueInst* C); @@ -665,7 +664,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(&ID), NoLoads(noloads), MD(0) { } + : FunctionPass(ID), NoLoads(noloads), MD(0) { } private: bool NoLoads; @@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { return new GVN(NoLoads); } -static RegisterPass<GVN> X("gvn", - "Global Value Numbering"); +INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false); void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; @@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) { for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) - if (PHINode* use_phi = dyn_cast<PHINode>(UI)) + if (PHINode* use_phi = dyn_cast<PHINode>(*UI)) if (use_phi->getParent() == inst->getParent()) return false; @@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); - SSAUpdate.Initialize(LI); + SSAUpdate.Initialize(LI->getType(), LI->getName()); const Type *LoadTy = LI->getType(); @@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa<DbgInfoIntrinsic>(CurInst)) continue; + + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) + if (CallI->isInlineAsm()) + continue; uint32_t ValNo = VN.lookup(CurInst); diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index b5c9dd8..af2eafc 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -77,7 +77,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(&ID) {} + IndVarSimplify() : LoopPass(ID) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -102,7 +102,7 @@ namespace { void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - Value *IndVar, + PHINode *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter); @@ -117,8 +117,8 @@ namespace { } char IndVarSimplify::ID = 0; -static RegisterPass<IndVarSimplify> -X("indvars", "Canonicalize Induction Variables"); +INITIALIZE_PASS(IndVarSimplify, "indvars", + "Canonicalize Induction Variables", false, false); Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); @@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() { /// is actually a much broader range than just linear tests. ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - Value *IndVar, + PHINode *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter) { @@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = L->getCanonicalInductionVariableIncrement(); + CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock); } else { // We have to use the preincremented value... RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, @@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that we know the largest of the induction variable expressions // in this loop, insert a canonical induction variable of the largest size. - Value *IndVar = 0; + PHINode *IndVar = 0; if (NeedCannIV) { // Check to see if the loop already has any canonical-looking induction // variables. If any are present and wider than the planned canonical @@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); - Instruction *U1 = cast<Instruction>(IncrUse++); + Instruction *U1 = cast<Instruction>(*IncrUse++); if (IncrUse == Incr->use_end()) return; - Instruction *U2 = cast<Instruction>(IncrUse++); + Instruction *U2 = cast<Instruction>(*IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index edce14c..104d5ae 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -45,7 +46,10 @@ Threshold("jump-threading-threshold", // Turn on use of LazyValueInfo. static cl::opt<bool> -EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden); +EnableLVI("enable-jump-threading-lvi", + cl::desc("Use LVI for jump threading"), + cl::init(true), + cl::ReallyHidden); @@ -74,15 +78,32 @@ namespace { #else SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; #endif + DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; + + // RAII helper for updating the recursion stack. + struct RecursionSetRemover { + DenseSet<std::pair<Value*, BasicBlock*> > &TheSet; + std::pair<Value*, BasicBlock*> ThePair; + + RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S, + std::pair<Value*, BasicBlock*> P) + : TheSet(S), ThePair(P) { } + + ~RecursionSetRemover() { + TheSet.erase(ThePair); + } + }; public: static char ID; // Pass identification - JumpThreading() : FunctionPass(&ID) {} + JumpThreading() : FunctionPass(ID) {} bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - if (EnableLVI) + if (EnableLVI) { AU.addRequired<LazyValueInfo>(); + AU.addPreserved<LazyValueInfo>(); + } } void FindLoopHeaders(Function &F); @@ -111,8 +132,8 @@ namespace { } char JumpThreading::ID = 0; -static RegisterPass<JumpThreading> -X("jump-threading", "Jump Threading"); +INITIALIZE_PASS(JumpThreading, "jump-threading", + "Jump Threading", false, false); // Public interface to the Jump Threading pass FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } @@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); + if (LVI) LVI->eraseBlock(BB); DeleteDeadBlock(BB); Changed = true; } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) { bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); BasicBlock *Succ = BI->getSuccessor(0); + // FIXME: It is always conservatively correct to drop the info + // for a block even if it doesn't get erased. This isn't totally + // awesome, but it allows us to use AssertingVH to prevent nasty + // dangling pointer issues within LazyValueInfo. + if (LVI) LVI->eraseBlock(BB); if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { Changed = true; // If we deleted BB and BB was the header of a loop, then the @@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) { LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); } +// Helper method for ComputeValueKnownInPredecessors. If Value is a +// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing. +static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*, + BasicBlock*> > &Result, + Constant *Value, BasicBlock* BB){ + if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value)) + Result.push_back(std::make_pair(FoldedCInt, BB)); + else if (isa<UndefValue>(Value)) + Result.push_back(std::make_pair((ConstantInt*)0, BB)); +} + /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see /// if we can infer that the value is a known ConstantInt in any of our /// predecessors. If so, return the known list of value and pred BB in the @@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// bool JumpThreading:: ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ + // This method walks up use-def chains recursively. Because of this, we could + // get into an infinite loop going around loops in the use-def chain. To + // prevent this, keep track of what (value, block) pairs we've already visited + // and terminate the search if we loop back to them + if (!RecursionSet.insert(std::make_pair(V, BB)).second) + return false; + + // An RAII help to remove this pair from the recursion set once the recursion + // stack pops back out again. + RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); + // If V is a constantint, then it is known in all predecessors. if (isa<ConstantInt>(V) || isa<UndefValue>(V)) { ConstantInt *CI = dyn_cast<ConstantInt>(V); for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Result.push_back(std::make_pair(CI, *PI)); + return true; } @@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) { ConstantInt *CI = dyn_cast<ConstantInt>(InVal); Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); + } else if (LVI) { + Constant *CI = LVI->getConstantOnEdge(InVal, + PN->getIncomingBlock(i), BB); + // LVI returns null is no value could be determined. + if (!CI) continue; + PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i)); } } + return !Result.empty(); } @@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ else InterestingVal = ConstantInt::getFalse(I->getContext()); + SmallPtrSet<BasicBlock*, 4> LHSKnownBBs; + // Scan for the sentinel. If we find an undef, force it to the // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { Result.push_back(LHSVals[i]); Result.back().first = InterestingVal; + LHSKnownBBs.insert(LHSVals[i].second); } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { // If we already inferred a value for this block on the LHS, don't // re-add it. - bool HasValue = false; - for (unsigned r = 0, e = Result.size(); r != e; ++r) - if (Result[r].second == RHSVals[i].second) { - HasValue = true; - break; - } - - if (!HasValue) { + if (!LHSKnownBBs.count(RHSVals[i].second)) { Result.push_back(RHSVals[i]); Result.back().first = InterestingVal; } } + return !Result.empty(); } @@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (Result[i].first) Result[i].first = cast<ConstantInt>(ConstantExpr::getNot(Result[i].first)); + return true; } + + // Try to simplify some other binary operator values. + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; + ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals); + + // Try to use constant folding to simplify the binary operator. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first ? LHSVals[i].first : + cast<Constant>(UndefValue::get(BO->getType())); + Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); + + PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + } + } + + return !Result.empty(); } // Handle compare with phi operand, where the PHI is defined in this block. @@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); } - if (isa<UndefValue>(Res)) - Result.push_back(std::make_pair((ConstantInt*)0, PredBB)); - else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res)) - Result.push_back(std::make_pair(CI, PredBB)); + if (Constant *ConstRes = dyn_cast<Constant>(Res)) + PushConstantIntOrUndef(Result, ConstRes, PredBB); } return !Result.empty(); @@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. if (LVI && isa<Constant>(Cmp->getOperand(1)) && - Cmp->getType()->isIntegerTy() && // Not vector compare. - (!isa<Instruction>(Cmp->getOperand(0)) || - cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { - Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + Cmp->getType()->isIntegerTy()) { + if (!isa<Instruction>(Cmp->getOperand(0)) || + cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { + Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate Res = + LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, P, BB); + if (Res == LazyValueInfo::Unknown) + continue; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - // If the value is known by LazyValueInfo to be a constant in a - // predecessor, use that information to try to thread this block. - LazyValueInfo::Tristate - Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, P, BB); - if (Res == LazyValueInfo::Unknown) - continue; + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + } - Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + return !Result.empty(); } - - return !Result.empty(); + + // Try to find a constant value for the LHS of a comparison, + // and evaluate it statically if we can. + if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) { + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); + + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first ? LHSVals[i].first : + cast<Constant>(UndefValue::get(CmpConst->getType())); + Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + V, CmpConst); + PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + } + + return !Result.empty(); + } + } + } + + if (LVI) { + // If all else fails, see if LVI can figure out a constant value for us. + Constant *CI = LVI->getConstant(V, BB); + ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI); + if (CInt) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(CInt, *PI)); } + + return !Result.empty(); } + return false; } @@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + if (LVI) LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); if (isEntry && BB != &BB->getParent()->getEntryBlock()) @@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } } + + // For a comparison where the LHS is outside this block, it's possible + // that we've branched on it before. Used LVI to see if we can simplify + // the branch based on that. + BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); + Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1)); + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE && + (!isa<Instruction>(CondCmp->getOperand(0)) || + cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) { + // For predecessor edge, determine if the comparison is true or false + // on that edge. If they're all true or all false, we can simplify the + // branch. + // FIXME: We could handle mixed true/false by duplicating code. + LazyValueInfo::Tristate Baseline = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Baseline != LazyValueInfo::Unknown) { + // Check that all remaining incoming values match the first one. + while (++PI != PE) { + LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge( + CondCmp->getPredicate(), + CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Ret != Baseline) break; + } + + // If we terminated early, then one of the values didn't match. + if (PI == PE) { + unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0; + unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1; + RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD); + BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); + CondBr->eraseFromParent(); + return true; + } + } + } } // Check for some cases that are worth simplifying. Right now we want to look @@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues; if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) return false; + assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); @@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, << ", across block:\n " << *BB << "\n"); + if (LVI) + LVI->threadEdge(PredBB, BB, SuccBB); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. @@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I); + SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); @@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I); + SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 7347395..2ef8544 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -26,8 +26,7 @@ // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use -// the mem2reg functionality to construct the appropriate SSA form for the -// variable. +// the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// @@ -37,14 +36,15 @@ #include "llvm/DerivedTypes.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid - LICM() : LoopPass(&ID) {} + LICM() : LoopPass(ID) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -75,39 +75,31 @@ namespace { /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg) + AU.addRequired<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); - AU.addPreserved<DominanceFrontier>(); AU.addPreservedID(LoopSimplifyID); } bool doFinalization() { - // Free the values stored in the map - for (std::map<Loop *, AliasSetTracker *>::iterator - I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I) - delete I->second; - - LoopToAliasMap.clear(); + assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); return false; } private: - // Various analyses that we use... AliasAnalysis *AA; // Current AliasAnalysis information LoopInfo *LI; // Current LoopInfo - DominatorTree *DT; // Dominator Tree for the current Loop... - DominanceFrontier *DF; // Current Dominance Frontier + DominatorTree *DT; // Dominator Tree for the current Loop. - // State that is updated as we process loops + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... - std::map<Loop *, AliasSetTracker *> LoopToAliasMap; + DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); @@ -204,25 +196,12 @@ namespace { bool isLoopInvariantInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - /// PromoteValuesInLoop - Look at the stores in the loop and promote as many - /// to scalars as we can. - /// - void PromoteValuesInLoop(); - - /// FindPromotableValuesInLoop - Check the current loop for stores to - /// definite pointers, which are not loaded and stored through may aliases. - /// If these are found, create an alloca for the value, add it to the - /// PromotedValues list, and keep track of the mapping from value to - /// alloca... - /// - void FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &Val2AlMap); + void PromoteAliasSet(AliasSet &AS); }; } char LICM::ID = 0; -static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion"); +INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false); Pass *llvm::createLICMPass() { return new LICM(); } @@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Get our Loop and Alias Analysis information... LI = &getAnalysis<LoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); - DF = &getAnalysis<DominanceFrontier>(); DT = &getAnalysis<DominatorTree>(); CurAST = new AliasSetTracker(*AA); - // Collect Alias info from subloops + // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); LoopItr != LoopItrE; ++LoopItr) { Loop *InnerL = *LoopItr; - AliasSetTracker *InnerAST = LoopToAliasMap[InnerL]; - assert (InnerAST && "Where is my AST?"); + AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; + assert(InnerAST && "Where is my AST?"); // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); + + // Once we've incorporated the inner loop's AST into ours, we don't need the + // subloop's anymore. + delete InnerAST; + LoopToAliasSetMap.erase(InnerL); } CurLoop = L; @@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops... + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block } @@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { HoistRegion(DT->getNode(L->getHeader())); // Now that all loop invariants have been removed from the loop, promote any - // memory references to scalars that we can... - if (!DisablePromotion && Preheader && L->hasDedicatedExits()) - PromoteValuesInLoop(); - + // memory references to scalars that we can. + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) + PromoteAliasSet(*I); + } + // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; - LoopToAliasMap[L] = CurAST; + // If this loop is nested inside of another one, save the alias information + // for when we process the outer loop. + if (L->getParentLoop()) + LoopToAliasSetMap[L] = CurAST; + else + delete CurAST; return Changed; } @@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return; - // We are processing blocks in reverse dfo, so process children first... + // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) SinkRegion(Children[i]); @@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; + + // If the instruction is dead, we would try to sink it because it isn't used + // in the loop, instead, just delete it. + if (isInstructionTriviallyDead(&I)) { + DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); + Changed = true; + continue; + } // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are @@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { Instruction &I = *II++; + // Try constant folding this instruction. If all the operands are + // constants, it is technically hoistable, but it would be better to just + // fold it. + if (Constant *C = ConstantFoldInstruction(&I)) { + DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); + CurAST->copyValue(&I, C); + CurAST->deleteValue(&I); + I.replaceAllUsesWith(C); + I.eraseFromParent(); + continue; + } + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. @@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) { if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && isSafeToExecuteUnconditionally(I)) hoist(I); - } + } const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) @@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DEBUG(dbgs() << "LICM sinking instruction: " << I); + DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); + CurLoop->getUniqueExitBlocks(ExitBlocks); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) { // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. - I.removeFromParent(); - BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); - ExitBlocks[0]->getInstList().insert(InsertPt, &I); + I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); + + // This instruction is no longer in the AST for the current loop, because + // we just sunk it out of the loop. If we just sunk it into an outer + // loop, we will rediscover the operation when we process it. + CurAST->deleteValue(&I); } - } else if (ExitBlocks.empty()) { + return; + } + + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); - } else { - // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to - // do all of the hard work of inserting PHI nodes as necessary. We convert - // the value into a stack object to get it to do this. - - // Firstly, we create a stack object to hold the value... - AllocaInst *AI = 0; - - if (!I.getType()->isVoidTy()) { - AI = new AllocaInst(I.getType(), 0, I.getName(), - I.getParent()->getParent()->getEntryBlock().begin()); - CurAST->add(AI); - } - - // Secondly, insert load instructions for each use of the instruction - // outside of the loop. - while (!I.use_empty()) { - Instruction *U = cast<Instruction>(I.use_back()); - - // If the user is a PHI Node, we actually have to insert load instructions - // in all predecessor blocks, not in the PHI block itself! - if (PHINode *UPN = dyn_cast<PHINode>(U)) { - // Only insert into each predecessor once, so that we don't have - // different incoming values from the same block! - std::map<BasicBlock*, Value*> InsertedBlocks; - for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i) - if (UPN->getIncomingValue(i) == &I) { - BasicBlock *Pred = UPN->getIncomingBlock(i); - Value *&PredVal = InsertedBlocks[Pred]; - if (!PredVal) { - // Insert a new load instruction right before the terminator in - // the predecessor block. - PredVal = new LoadInst(AI, "", Pred->getTerminator()); - CurAST->add(cast<LoadInst>(PredVal)); - } - - UPN->setIncomingValue(i, PredVal); - } - - } else { - LoadInst *L = new LoadInst(AI, "", U); - U->replaceUsesOfWith(&I, L); - CurAST->add(L); - } - } - - // Thirdly, insert a copy of the instruction in each exit block of the loop - // that is dominated by the instruction, storing the result into the memory - // location. Be careful not to insert the instruction into any particular - // basic block more than once. - std::set<BasicBlock*> InsertedBlocks; - BasicBlock *InstOrigBB = I.getParent(); - - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = ExitBlocks[i]; - - if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) { - // If we haven't already processed this exit block, do so now. - if (InsertedBlocks.insert(ExitBlock).second) { - // Insert the code after the last PHI node... - BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - - // If this is the first exit block processed, just move the original - // instruction, otherwise clone the original instruction and insert - // the copy. - Instruction *New; - if (InsertedBlocks.size() == 1) { - I.removeFromParent(); - ExitBlock->getInstList().insert(InsertPt, &I); - New = &I; - } else { - New = I.clone(); - CurAST->copyValue(&I, New); - if (!I.getName().empty()) - New->setName(I.getName()+".le"); - ExitBlock->getInstList().insert(InsertPt, New); - } - - // Now that we have inserted the instruction, store it into the alloca - if (AI) new StoreInst(New, AI, InsertPt); - } - } - } - - // If the instruction doesn't dominate any exit blocks, it must be dead. - if (InsertedBlocks.empty()) { - CurAST->deleteValue(&I); - I.eraseFromParent(); - } - - // Finally, promote the fine value to SSA form. - if (AI) { - std::vector<AllocaInst*> Allocas; - Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, CurAST); + return; + } + + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the + // hard work of inserting PHI nodes as necessary. + SmallVector<PHINode*, 8> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + if (!I.use_empty()) + SSA.Initialize(I.getType(), I.getName()); + + // Insert a copy of the instruction in each exit block of the loop that is + // dominated by the instruction. Each exit block is known to only be in the + // ExitBlocks list once. + BasicBlock *InstOrigBB = I.getParent(); + unsigned NumInserted = 0; + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + + if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) + continue; + + // Insert the code after the last PHI node. + BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); + + // If this is the first exit block processed, just move the original + // instruction, otherwise clone the original instruction and insert + // the copy. + Instruction *New; + if (NumInserted++ == 0) { + I.moveBefore(InsertPt); + New = &I; + } else { + New = I.clone(); + if (!I.getName().empty()) + New->setName(I.getName()+".le"); + ExitBlock->getInstList().insert(InsertPt, New); } + + // Now that we have inserted the instruction, inform SSAUpdater. + if (!I.use_empty()) + SSA.AddAvailableValue(ExitBlock, New); } + + // If the instruction doesn't dominate any exit blocks, it must be dead. + if (NumInserted == 0) { + CurAST->deleteValue(&I); + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.eraseFromParent(); + return; + } + + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { + // Grab the use before incrementing the iterator. + Use &U = UI.getUse(); + // Increment the iterator before removing the use from the list. + ++UI; + SSA.RewriteUseAfterInsertions(U); + } + + // Update CurAST for NewPHIs if I had pointer type. + if (I.getType()->isPointerTy()) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(&I, NewPHIs[i]); + + // Finally, remove the instruction from CurAST. It is no longer in the loop. + CurAST->deleteValue(&I); } /// hoist - When an instruction is found to only use loop invariant operands @@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) { DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); - // Remove the instruction from its current basic block... but don't delete the - // instruction. - I.removeFromParent(); - - // Insert the new node in Preheader, before the terminator. - Preheader->getInstList().insert(Preheader->getTerminator(), &I); + // Move the new node to the Preheader, before its terminator. + I.moveBefore(Preheader->getTerminator()); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { return true; } - -/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking +/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers -/// which are loop invariant. We promote these memory locations to use allocas -/// instead. These allocas can easily be raised to register values by the -/// PromoteMem2Reg functionality. +/// which are loop invariant. /// -void LICM::PromoteValuesInLoop() { - // PromotedValues - List of values that are promoted out of the loop. Each - // value has an alloca instruction for it, and a canonical version of the - // pointer. - std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; - std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca - - FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); - if (ValueToAllocaMap.empty()) return; // If there are values to promote. - - Changed = true; - NumPromoted += PromotedValues.size(); - - std::vector<Value*> PointerValueNumbers; - - // Emit a copy from the value into the alloca'd value in the loop preheader - TerminatorInst *LoopPredInst = Preheader->getTerminator(); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - Value *Ptr = PromotedValues[i].second; - - // If we are promoting a pointer value, update alias information for the - // inserted load. - Value *LoadValue = 0; - if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) { - // Locate a load or store through the pointer, and assign the same value - // to LI as we are loading or storing. Since we know that the value is - // stored in this loop, this will always succeed. - for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - LoadValue = LI; - break; - } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(1) == Ptr) { - LoadValue = SI->getOperand(0); - break; - } - } - } - assert(LoadValue && "No store through the pointer found!"); - PointerValueNumbers.push_back(LoadValue); // Remember this for later. - } - - // Load from the memory we are promoting. - LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst); - - if (LoadValue) CurAST->copyValue(LoadValue, LI); - - // Store into the temporary alloca. - new StoreInst(LI, PromotedValues[i].first, LoopPredInst); - } +void LICM::PromoteAliasSet(AliasSet &AS) { + // We can promote this alias set if it has a store, if it is a "Must" alias + // set, if the pointer is loop invariant, and if we are not eliminating any + // volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) + return; + + assert(!AS.empty() && + "Must alias set should have at least one pointer element in it!"); + Value *SomePtr = AS.begin()->getValue(); - // Scan the basic blocks in the loop, replacing uses of our pointers with - // uses of the allocas in question. + // It isn't safe to promote a load/store from the loop if the load/store is + // conditional. For example, turning: // - for (Loop::block_iterator I = CurLoop->block_begin(), - E = CurLoop->block_end(); I != E; ++I) { - BasicBlock *BB = *I; - // Rewrite all loads and stores in the block of the pointer... - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(L->getOperand(0)); - if (I != ValueToAllocaMap.end()) - L->setOperand(0, I->second); // Rewrite load instruction... - } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(S->getOperand(1)); - if (I != ValueToAllocaMap.end()) - S->setOperand(1, I->second); // Rewrite store instruction... - } - } - } - - // Now that the body of the loop uses the allocas instead of the original - // memory locations, insert code to copy the alloca value back into the - // original memory location on all exits from the loop. Note that we only - // want to insert one copy of the code in each exit block, though the loop may - // exit to the same block more than once. + // for () { if (c) *P += 1; } // - SmallPtrSet<BasicBlock*, 16> ProcessedBlocks; - - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - if (!ProcessedBlocks.insert(ExitBlocks[i])) - continue; - - // Copy all of the allocas into their memory locations. - BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI(); - Instruction *InsertPos = BI; - unsigned PVN = 0; - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - // Load from the alloca. - LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); - - // If this is a pointer type, update alias info appropriately. - if (LI->getType()->isPointerTy()) - CurAST->copyValue(PointerValueNumbers[PVN++], LI); - - // Store into the memory we promoted. - new StoreInst(LI, PromotedValues[i].second, InsertPos); - } - } - - // Now that we have done the deed, use the mem2reg functionality to promote - // all of the new allocas we just created into real SSA registers. + // into: // - std::vector<AllocaInst*> PromotedAllocas; - PromotedAllocas.reserve(PromotedValues.size()); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) - PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); -} - -/// FindPromotableValuesInLoop - Check the current loop for stores to definite -/// pointers, which are not loaded and stored through may aliases and are safe -/// for promotion. If these are found, create an alloca for the value, add it -/// to the PromotedValues list, and keep track of the mapping from value to -/// alloca. -void LICM::FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &ValueToAllocaMap) { - Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); - - // Loop over all of the alias sets in the tracker object. - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) { - AliasSet &AS = *I; - // We can promote this alias set if it has a store, if it is a "Must" alias - // set, if the pointer is loop invariant, and if we are not eliminating any - // volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) - continue; + // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; + // + // is not safe, because *P may only be valid to access if 'c' is true. + // + // It is safe to promote P if all uses are direct load/stores and if at + // least one is guaranteed to be executed. + bool GuaranteedToExecute = false; + + SmallVector<Instruction*, 64> LoopUses; + SmallPtrSet<Value*, 4> PointerMustAliases; + + // Check that all of the pointers in the alias set have the same type. We + // cannot (yet) promote a memory location that is loaded and stored in + // different sizes. + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + Value *ASIV = ASI->getValue(); + PointerMustAliases.insert(ASIV); - assert(!AS.empty() && - "Must alias set should have at least one pointer element in it!"); - Value *V = AS.begin()->getValue(); - // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. - { - bool PointerOk = true; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - if (V->getType() != I->getValue()->getType()) { - PointerOk = false; - break; - } - if (!PointerOk) - continue; - } - - // It isn't safe to promote a load/store from the loop if the load/store is - // conditional. For example, turning: - // - // for () { if (c) *P += 1; } - // - // into: - // - // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; - // - // is not safe, because *P may only be valid to access if 'c' is true. - // - // It is safe to promote P if all uses are direct load/stores and if at - // least one is guaranteed to be executed. - bool GuaranteedToExecute = false; - bool InvalidInst = false; - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + if (SomePtr->getType() != ASIV->getType()) + return; + + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { - // Ignore instructions not in this loop. + // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - - if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) { - InvalidInst = true; - break; - } + + // If there is an non-load/store instruction in the loop, we can't promote + // it. + if (isa<LoadInst>(Use)) + assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); + else if (isa<StoreInst>(Use)) { + assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); + if (Use->getOperand(0) == ASIV) return; + } else + return; // Not a load or store. if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); + + LoopUses.push_back(Use); } + } + + // If there isn't a guaranteed-to-execute instruction, we can't promote. + if (!GuaranteedToExecute) + return; + + // Otherwise, this is safe to promote, lets do it! + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + Changed = true; + ++NumPromoted; - // If there is an non-load/store instruction in the loop, we can't promote - // it. If there isn't a guaranteed-to-execute instruction, we can't - // promote. - if (InvalidInst || !GuaranteedToExecute) + // We use the SSAUpdater interface to insert phi nodes as required. + SmallVector<PHINode*, 16> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + // It wants to know some value of the same type as what we'll be inserting. + Value *SomeValue; + if (isa<LoadInst>(LoopUses[0])) + SomeValue = LoopUses[0]; + else + SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); + SSA.Initialize(SomeValue->getType(), SomeValue->getName()); + + // First step: bucket up uses of the pointers by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. + DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; + for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { + Instruction *User = LoopUses[i]; + UsesByBlock[User->getParent()].push_back(User); + } + + // Okay, now we can iterate over all the blocks in the loop with uses, + // processing them. Keep track of which loads are loading a live-in value. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { + Instruction *User = LoopUses[LoopUse]; + std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (isa<StoreInst>(User)) { + SSA.AddAvailableValue(User->getParent(), + cast<StoreInst>(User)->getOperand(0)); + } else { + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + } + BlockUses.clear(); continue; + } - const Type *Ty = cast<PointerType>(V->getType())->getElementType(); - AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart); - PromotedValues.push_back(std::make_pair(AI, V)); + // Otherwise, check to see if this block is all loads. If so, we can queue + // them all as live in loads. + bool HasStore = false; + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { + if (isa<StoreInst>(BlockUses[i])) { + HasStore = true; + break; + } + } + + if (!HasStore) { + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) + LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + BlockUses.clear(); + continue; + } - // Update the AST and alias analysis. - CurAST->copyValue(V, AI); + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + BasicBlock *BB = User->getParent(); + Value *StoredValue = 0; + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast<LoadInst>(II)) { + // If this is a load from an unrelated pointer, ignore it. + if (!PointerMustAliases.count(L->getOperand(0))) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *S = dyn_cast<StoreInst>(II)) { + // If this is a store to an unrelated pointer, ignore it. + if (!PointerMustAliases.count(S->getOperand(1))) continue; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); + // Remember that this is the active value in the block. + StoredValue = S->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Now that all the intra-loop values are classified, set up the preheader. + // It gets a load of the pointer we're promoting, and it is the live-out value + // from the preheader. + LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", + Preheader->getTerminator()); + SSA.AddAvailableValue(Preheader, PreheaderLoad); + + // Now that the preheader is good to go, set up the exit blocks. Each exit + // block gets a store of the live-out values that feed them. Since we've + // already told the SSA updater about the defs in the loop and the preheader + // definition, it is all set and we can start using it. + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Instruction *InsertPos = ExitBlock->getFirstNonPHI(); + new StoreInst(LiveInValue, SomePtr, InsertPos); + } - DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n"); + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { + LoadInst *ALoad = LiveInLoads[i]; + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + ALoad->replaceAllUsesWith(NewVal); + CurAST->copyValue(ALoad, NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // If the preheader load is itself a pointer, we need to tell alias analysis + // about the new pointer we created in the preheader block and about any PHI + // nodes that just got inserted. + if (PreheaderLoad->getType()->isPointerTy()) { + // Copy any value stored to or loaded from a must-alias of the pointer. + CurAST->copyValue(SomeValue, PreheaderLoad); + + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(SomeValue, NewPHIs[i]); } + + // Now that everything is rewritten, delete the old instructions from the body + // of the loop. They should all be dead now. + for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { + Instruction *User = LoopUses[i]; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + User->replaceAllUsesWith(NewVal); + CurAST->copyValue(User, NewVal); + } + + CurAST->deleteValue(User); + User->eraseFromParent(); + } + + // fwew, we're done! } + /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; @@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias /// set. void LICM::deleteAnalysisValue(Value *V, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index e4894e9..543dfc1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -28,7 +28,7 @@ namespace { class LoopDeletion : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopDeletion() : LoopPass(&ID) {} + LoopDeletion() : LoopPass(ID) {} // Possibly eliminate loop L if it is dead. bool runOnLoop(Loop* L, LPPassManager& LPM); @@ -38,9 +38,9 @@ namespace { bool &Changed, BasicBlock *Preheader); virtual void getAnalysisUsage(AnalysisUsage& AU) const { - AU.addRequired<ScalarEvolution>(); AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); @@ -55,7 +55,8 @@ namespace { } char LoopDeletion::ID = 0; -static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops"); +INITIALIZE_PASS(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false); Pass* llvm::createLoopDeletionPass() { return new LoopDeletion(); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp index 31058e5..a433674 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -74,7 +74,7 @@ namespace { class LoopIndexSplit : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopIndexSplit() : LoopPass(&ID) {} + LoopIndexSplit() : LoopPass(ID) {} // Index split Loop L. Return true if loop is split. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -197,8 +197,8 @@ namespace { } char LoopIndexSplit::ID = 0; -static RegisterPass<LoopIndexSplit> -X("loop-index-split", "Index Split Loops"); +INITIALIZE_PASS(LoopIndexSplit, "loop-index-split", + "Index Split Loops", false, false); Pass *llvm::createLoopIndexSplitPass() { return new LoopIndexSplit(); @@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB); PI != PE; ++PI) { BasicBlock *P = *PI; - if (P == DeadBB || DT->dominates(DeadBB, P)) + if (DT->dominates(DeadBB, P)) PredBlocks.push_back(P); } @@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, // the dominance frontiers. for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end(); I != E; ++I) { - if (*I == CondBB || !DT->dominates(CondBB, *I)) continue; + if (!DT->properlyDominates(CondBB, *I)) continue; DominanceFrontier::iterator BBDF = DF->find(*I); DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); @@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) { bool usedOutsideBB = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - Instruction *U = cast<Instruction>(UI); + Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB) usedOutsideBB = true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 16c4a15..65acc1d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -35,7 +35,7 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(&ID) {} + LoopRotate() : LoopPass(ID) {} // Rotate Loop L as many times as possible. Return true if // loop is rotated at least once. @@ -43,15 +43,15 @@ namespace { // LCSSA form makes instruction renaming easier. virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominanceFrontier>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } // Helper functions @@ -79,7 +79,7 @@ namespace { } char LoopRotate::ID = 0; -static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops"); +INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false); Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } @@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // The value now exits in two versions: the initial value in the preheader // and the loop "next" value in the original header. - SSA.Initialize(OrigHeaderVal); + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal); @@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // NewHeader is now the header of the loop. L->moveToHeader(NewHeader); + // Move the original header to the bottom of the loop, where it now more + // naturally belongs. This isn't necessary for correctness, and CodeGen can + // usually reorder blocks on its own to fix things like this up, but it's + // still nice to keep the IR readable. + // + // The original header should have only one predecessor at this point, since + // we checked that the loop had a proper preheader and unique backedge before + // we started. + assert(OrigHeader->getSinglePredecessor() && + "Original loop header has too many predecessors after loop rotation!"); + OrigHeader->moveAfter(OrigHeader->getSinglePredecessor()); + + // Also, since this original header only has one predecessor, zap its + // PHI nodes, which are now trivial. + FoldSingleEntryPHINodes(OrigHeader); + + // TODO: We could just go ahead and merge OrigHeader into its predecessor + // at this point, if we don't mind updating dominator info. + + // Establish a new preheader, update dominators, etc. preserveCanonicalLoopForm(LPM); ++NumRotated; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1f9b415..e8dc5d3 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) { bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUsesMap.count(Reg)) return false; - const SmallBitVector &UsedByIndices = - RegUsesMap.find(Reg)->second.UsedByIndices; + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + if (I == RegUsesMap.end()) + return false; + const SmallBitVector &UsedByIndices = I->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { - const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, - IgnoreSignificantBits); - if (!Start) return 0; const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); if (!Step) return 0; + const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } return 0; @@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddExpr(NewOps); + if (Result != 0) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result != 0) + S = SE.getAddRecExpr(NewOps, AR->getLoop()); return Result; } return 0; @@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); - S = SE.getAddExpr(NewOps); + if (Result) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result) + S = SE.getAddRecExpr(NewOps, AR->getLoop()); return Result; } return 0; @@ -965,6 +970,12 @@ public: /// may be used. bool AllFixupsOutsideLoop; + /// WidestFixupType - This records the widest use type for any fixup using + /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different + /// max fixup widths to be equivalent, because the narrower one may be relying + /// on the implicit truncation to truncate away bogus bits. + const Type *WidestFixupType; + /// Formulae - A list of ways to build a value that can satisfy this user. /// After the list is populated, one of these is selected heuristically and /// used to formulate a replacement for OperandValToReplace in UserInst. @@ -976,15 +987,14 @@ public: LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true) {} + AllFixupsOutsideLoop(true), + WidestFixupType(0) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); void DeleteFormula(Formula &F); void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); - void check() const; - void print(raw_ostream &OS) const; void dump() const; }; @@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const { for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), E = Offsets.end(); I != E; ++I) { OS << *I; - if (next(I) != E) + if (llvm::next(I) != E) OS << ','; } OS << '}'; if (AllFixupsOutsideLoop) OS << ", all-fixups-outside-loop"; + + if (WidestFixupType) + OS << ", widest fixup type: " << *WidestFixupType; } void LSRUse::dump() const { @@ -1354,6 +1367,10 @@ public: void FilterOutUndesirableDedicatedRegisters(); size_t EstimateSearchSpaceComplexity() const; + void NarrowSearchSpaceByDetectingSupersets(); + void NarrowSearchSpaceByCollapsingUnrolledCode(); + void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); // Add one to the backedge-taken count to get the trip count. - const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); if (IterationCount != SE.getSCEV(Sel)) return Cond; // Check for a max calculation that matches the pattern. There's no check @@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) { LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { - // Search all uses for the formula. This could be more clever. Ignore - // ICmpZero uses because they may contain formulae generated by - // GenerateICmpZeroScales, in which case adding fixup offsets may - // be invalid. + // Search all uses for the formula. This could be more clever. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; + // Check whether this use is close enough to OrigLU, to see whether it's + // worthwhile looking through its formulae. + // Ignore ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero && LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && + LU.WidestFixupType == OrigLU.WidestFixupType && LU.HasFormulaWithSameRegs(OrigF)) { + // Scan through this use's formulae. for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; + // Check to see if this formula has the same registers and symbols + // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale && - LU.Kind) { + F.AM.Scale == OrigF.AM.Scale) { if (F.AM.BaseOffs == 0) return &LU; + // This is the formula where all the registers and symbols matched; + // there aren't going to be any others. Since we declined it, we + // can skip the rest of the formulae and procede to the next LSRUse. break; } } } } + // Nothing looked good. return 0; } @@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { for (SmallSetVector<const SCEV *, 4>::const_iterator I = Strides.begin(), E = Strides.end(); I != E; ++I) for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = - next(I); NewStrideIter != E; ++NewStrideIter) { + llvm::next(I); NewStrideIter != E; ++NewStrideIter) { const SCEV *OldStride = *I; const SCEV *NewStride = *NewStrideIter; @@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); InsertSupplementalFormula(U, LU, LF.LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; @@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, - SmallVectorImpl<const SCEV *> &UninterestingOps, const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); + CollectSubexprs(*I, C, Ops, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. @@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), AR->getLoop()), - C, Ops, UninterestingOps, L, SE); - CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); + C, Ops, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, UninterestingOps, L, SE); + Ops, L, SE); return; } } - // Otherwise use the value itself. Loop-variant "unknown" values are - // uninteresting; we won't be able to do anything meaningful with them. - if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) - UninterestingOps.push_back(S); - else - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself, optionally with a scale applied. + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; - CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); - - // Add any uninteresting values as one register, as we won't be able to - // form any interesting reassociation opportunities with them. They'll - // just have to be added inside the loop no matter what we do. - if (!UninterestingAddOps.empty()) - AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + SmallVector<const SCEV *, 8> AddOps; + CollectSubexprs(BaseReg, 0, AddOps, L, SE); if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), JE = AddOps.end(); J != JE; ++J) { + + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L)) + continue; + // Don't pull a constant into a register if the constant could be folded // into an immediate field. if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, @@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Collect all operands except *J. SmallVector<const SCEV *, 8> InnerAddOps - ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); InnerAddOps.append - (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); + (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. if (NewG->isZero()) { std::swap(F.BaseRegs[i], F.BaseRegs.back()); @@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() { } GenerateCrossUseConstantOffsets(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); } /// If their are multiple formulae with the same set of registers used @@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const { return Power; } -/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of -/// formulae to choose from, use some rough heuristics to prune down the number -/// of formulae. This keeps the main solver from taking an extraordinary amount -/// of time in some worst-case scenarios. -void LSRInstance::NarrowSearchSpaceUsingHeuristics() { +/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset +/// of the registers of another formula, it won't help reduce register +/// pressure (though it may not necessarily hurt register pressure); remove +/// it to simplify the system. +void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} +/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers +/// for expressions like A, A+1, A+2, etc., allocate a single register for +/// them. +void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { if (Fixup.LUIdx == LUIdx) { Fixup.LUIdx = LUThatHas - &Uses.front(); Fixup.Offset += F.AM.BaseOffs; - DEBUG(errs() << "New fixup has offset " + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } if (Fixup.LUIdx == NumUses-1) @@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} + +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that +/// we've done more filtering, as it may be able to find more formulae to +/// eliminate. +void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by re-filtering out " + "undesirable dedicated registers.\n"); + + FilterOutUndesirableDedicatedRegisters(); + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } +} +/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely +/// to be profitable, and then in any use which has any reference to that +/// register, delete all formulae which do not reference that register. +void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // With all other options exhausted, loop until the system is simple // enough to handle. SmallPtrSet<const SCEV *, 4> Taken; @@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { } } +/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extraordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + NarrowSearchSpaceByDetectingSupersets(); + NarrowSearchSpaceByCollapsingUnrolledCode(); + NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + NarrowSearchSpaceByPickingWinnerRegs(); +} + /// SolveRecurse - This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, Cost &SolutionCost, @@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // to formulate the values needed for the uses. GenerateAllReuseFormulae(); - DEBUG(dbgs() << "\n" - "After generating reuse formulae:\n"; - print_uses(dbgs())); - FilterOutUndesirableDedicatedRegisters(); NarrowSearchSpaceUsingHeuristics(); @@ -3724,15 +3792,15 @@ private: } char LoopStrengthReduce::ID = 0; -static RegisterPass<LoopStrengthReduce> -X("loop-reduce", "Loop Strength Reduction"); +INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false); Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(&ID), TLI(tli) {} + : LoopPass(ID), TLI(tli) {} void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 4ad41ae..d0edfa2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -26,7 +27,7 @@ using namespace llvm; static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, +UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); static cl::opt<unsigned> @@ -42,7 +43,7 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(&ID) {} + LoopUnroll() : LoopPass(ID) {} /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much @@ -55,23 +56,24 @@ namespace { /// loop preheaders be inserted into the CFG... /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<LoopInfo>(); AU.addPreservedID(LCSSAID); - AU.addPreserved<LoopInfo>(); + AU.addPreserved<ScalarEvolution>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. // For now, recreate dom info, if loop is unrolled. AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } }; } char LoopUnroll::ID = 0; -static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops"); +INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false); Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } @@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; // FIXME: Reconstruct dom info, because it is not preserved properly. - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - if (DT) { + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) DT->runOnFunction(*F); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); - if (DF) - DF->runOnFunction(*F); - } return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 0c900ff..9afe428 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -77,7 +77,6 @@ namespace { bool redoLoop; Loop *currentLoop; - DominanceFrontier *DF; DominatorTree *DT; BasicBlock *loopHeader; BasicBlock *loopPreheader; @@ -92,15 +91,15 @@ namespace { public: static char ID; // Pass ID, replacement for typeid explicit LoopUnswitch(bool Os = false) : - LoopPass(&ID), OptimizeForSize(Os), redoLoop(false), - currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL), + LoopPass(ID), OptimizeForSize(Os), redoLoop(false), + currentLoop(NULL), DT(NULL), loopHeader(NULL), loopPreheader(NULL) {} bool runOnLoop(Loop *L, LPPassManager &LPM); bool processCurrentLoop(); /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... + /// loop preheaders be inserted into the CFG. /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); @@ -110,7 +109,6 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } private: @@ -160,7 +158,7 @@ namespace { }; } char LoopUnswitch::ID = 0; -static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops"); +INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false); Pass *llvm::createLoopUnswitchPass(bool Os) { return new LoopUnswitch(Os); @@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { LI = &getAnalysis<LoopInfo>(); LPM = &LPM_Ref; - DF = getAnalysisIfAvailable<DominanceFrontier>(); DT = getAnalysisIfAvailable<DominatorTree>(); currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); @@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { // FIXME: Reconstruct dom info, because it is not preserved properly. if (DT) DT->runOnFunction(*F); - if (DF) - DF->runOnFunction(*F); } return Changed; } @@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } -/// isTrivialLoopExitBlock - Check to see if all paths from BB either: -/// 1. Exit the loop with no side effects. -/// 2. Branch to the latch block with no side-effects. +/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the +/// loop with no side effects (including infinite loops). /// -/// If these conditions are true, we return true and set ExitBB to the block we +/// If true, we return true and set ExitBB to the block we /// exit through. /// static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, BasicBlock *&ExitBB, std::set<BasicBlock*> &Visited) { if (!Visited.insert(BB).second) { - // Already visited and Ok, end of recursion. - return true; + // Already visited. Without more analysis, this could indicate an infinte loop. + return false; } else if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the // first exit. @@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, /// process. If so, return the block that is exited to, otherwise return null. static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set<BasicBlock*> Visited; - Visited.insert(L->getHeader()); // Branches to header are ok. + Visited.insert(L->getHeader()); // Branches to header make infinite loops. BasicBlock *ExitBB = 0; if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) return ExitBB; @@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, if (!BI->isConditional() || BI->getCondition() != Cond) return false; - // Check to see if a successor of the branch is guaranteed to go to the - // latch block or exit through a one exit block without having any + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any // side-effects. If so, determine the value of Cond that causes it to do // this. if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp new file mode 100644 index 0000000..973ffe7 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp @@ -0,0 +1,161 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loweratomic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +namespace { + +bool LowerAtomicIntrinsic(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + + Function *Callee = CI->getCalledFunction(); + if (!Callee) + return false; + + unsigned IID = Callee->getIntrinsicID(); + switch (IID) { + case Intrinsic::memory_barrier: + break; + + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: { + Value *Ptr = CI->getArgOperand(0); + Value *Delta = CI->getArgOperand(1); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Res = NULL; + switch (IID) { + default: assert(0 && "Unrecognized atomic modify operation"); + case Intrinsic::atomic_load_add: + Res = Builder.CreateAdd(Orig, Delta); + break; + case Intrinsic::atomic_load_sub: + Res = Builder.CreateSub(Orig, Delta); + break; + case Intrinsic::atomic_load_and: + Res = Builder.CreateAnd(Orig, Delta); + break; + case Intrinsic::atomic_load_nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); + break; + case Intrinsic::atomic_load_or: + Res = Builder.CreateOr(Orig, Delta); + break; + case Intrinsic::atomic_load_xor: + Res = Builder.CreateXor(Orig, Delta); + break; + case Intrinsic::atomic_load_max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Delta, + Orig); + break; + case Intrinsic::atomic_load_min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Orig, + Delta); + break; + case Intrinsic::atomic_load_umax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Delta, + Orig); + break; + case Intrinsic::atomic_load_umin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Orig, + Delta); + break; + } + Builder.CreateStore(Res, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + case Intrinsic::atomic_swap: { + Value *Ptr = CI->getArgOperand(0); + Value *Val = CI->getArgOperand(1); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Builder.CreateStore(Val, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + case Intrinsic::atomic_cmp_swap: { + Value *Ptr = CI->getArgOperand(0); + Value *Cmp = CI->getArgOperand(1); + Value *Val = CI->getArgOperand(2); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + default: + return false; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); + + return true; +} + +struct LowerAtomic : public BasicBlockPass { + static char ID; + LowerAtomic() : BasicBlockPass(ID) {} + bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { + Instruction *Inst = DI++; + if (CallInst *CI = dyn_cast<CallInst>(Inst)) + Changed |= LowerAtomicIntrinsic(CI); + } + return Changed; + } + +}; + +} + +char LowerAtomic::ID = 0; +INITIALIZE_PASS(LowerAtomic, "loweratomic", + "Lower atomic intrinsics to non-atomic form", + false, false); + +Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0e566c5..24fae42 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -304,7 +304,7 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - MemCpyOpt() : FunctionPass(&ID) {} + MemCpyOpt() : FunctionPass(ID) {} private: // This transformation requires dominator postdominator info @@ -331,8 +331,7 @@ namespace { // createMemCpyOptPass - The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } -static RegisterPass<MemCpyOpt> X("memcpyopt", - "MemCpy Optimization"); +INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false); @@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // If the call is readnone, ignore it, otherwise bail out. We don't even // allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). - if (AA.getModRefBehavior(CallSite::get(BI)) == + if (AA.getModRefBehavior(CallSite(BI)) == AliasAnalysis::DoesNotAccessMemory) continue; @@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); - CallSite CS = CallSite::get(C); + CallSite CS(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. @@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { return true; } -/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which -/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be -/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). -/// This allows later passes to remove the first memcpy altogether. +/// processMemCpy - perform simplification of memcpy's. If we have memcpy A +/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite +/// B to be a memcpy from X to Z (or potentially a memmove, depending on +/// circumstances). This allows later passes to remove the first memcpy +/// altogether. bool MemCpyOpt::processMemCpy(MemCpyInst *M) { MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); @@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { const Type *ArgTys[3] = { M->getRawDest()->getType(), M->getRawSource()->getType(), M->getLength()->getType() }; - M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); + M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, + ArgTys, 3)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index 98452f5..b8afcc1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -77,7 +77,7 @@ namespace { bool MadeChange; public: static char ID; // Pass identification, replacement for typeid - Reassociate() : FunctionPass(&ID) {} + Reassociate() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -103,7 +103,8 @@ namespace { } char Reassociate::ID = 0; -static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions"); +INITIALIZE_PASS(Reassociate, "reassociate", + "Reassociate expressions", false, false); // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp index 13222ac..506b72a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp @@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); namespace { struct RegToMem : public FunctionPass { static char ID; // Pass identification, replacement for typeid - RegToMem() : FunctionPass(&ID) {} + RegToMem() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); @@ -59,8 +59,8 @@ namespace { } char RegToMem::ID = 0; -static RegisterPass<RegToMem> -X("reg2mem", "Demote all values to stack slots"); +INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false); bool RegToMem::runOnFunction(Function &F) { @@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) { // createDemoteRegisterToMemory - Provide an entry point to create this pass. // -const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; +char &llvm::DemoteRegisterToMemoryID = RegToMem::ID; FunctionPass *llvm::createDemoteRegisterToMemoryPass() { return new RegToMem(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 907ece8..6115c05 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -275,12 +275,12 @@ public: return I->second; } - LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { + /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = StructValueState.find(std::make_pair(V, i)); assert(I != StructValueState.end() && "V is not in valuemap!"); return I->second; - } + }*/ /// getTrackedRetVals - Get the inferred return value map. /// @@ -508,17 +508,16 @@ private: void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); void visitCallInst (CallInst &I) { - visitCallSite(CallSite::get(&I)); + visitCallSite(&I); } void visitInvokeInst (InvokeInst &II) { - visitCallSite(CallSite::get(&II)); + visitCallSite(&II); visitTerminatorInst(II); } void visitCallSite (CallSite CS); void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVANextInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } void visitInstruction(Instruction &I) { @@ -1586,7 +1585,7 @@ namespace { /// struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid - SCCP() : FunctionPass(&ID) {} + SCCP() : FunctionPass(ID) {} // runOnFunction - Run the Sparse Conditional Constant Propagation // algorithm, and return true if the function was modified. @@ -1600,8 +1599,8 @@ namespace { } // end anonymous namespace char SCCP::ID = 0; -static RegisterPass<SCCP> -X("sccp", "Sparse Conditional Constant Propagation"); +INITIALIZE_PASS(SCCP, "sccp", + "Sparse Conditional Constant Propagation", false, false); // createSCCPPass - This is the public interface to this file. FunctionPass *llvm::createSCCPPass() { @@ -1702,14 +1701,15 @@ namespace { /// struct IPSCCP : public ModulePass { static char ID; - IPSCCP() : ModulePass(&ID) {} + IPSCCP() : ModulePass(ID) {} bool runOnModule(Module &M); }; } // end anonymous namespace char IPSCCP::ID = 0; -static RegisterPass<IPSCCP> -Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation"); +INITIALIZE_PASS(IPSCCP, "ipsccp", + "Interprocedural Sparse Conditional Constant Propagation", + false, false); // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { @@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) { bool IPSCCP::runOnModule(Module &M) { SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + // AddressTakenFunctions - This set keeps track of the address-taken functions + // that are in the input. As IPSCCP runs through and simplifies code, + // functions that were address taken can end up losing their + // address-taken-ness. Because of this, we keep track of their addresses from + // the first pass so we can use them for the later simplification pass. + SmallPtrSet<Function*, 32> AddressTakenFunctions; + // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. // @@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) { // If this function only has direct calls that we can see, we can track its // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. - if (F->hasLocalLinkage() && !AddressIsTaken(F)) { - Solver.AddArgumentTrackedFunction(F); - continue; + if (F->hasLocalLinkage()) { + if (AddressIsTaken(F)) + AddressTakenFunctions.insert(F); + else { + Solver.AddArgumentTrackedFunction(F); + continue; + } } // Assume the function is called. @@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) { continue; // We can only do this if we know that nothing else can call the function. - if (!F->hasLocalLinkage() || AddressIsTaken(F)) + if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F)) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index dd445f6..fee317d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -28,6 +28,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" @@ -51,7 +52,7 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid - explicit SROA(signed T = -1) : FunctionPass(&ID) { + explicit SROA(signed T = -1) : FunctionPass(ID) { if (T == -1) SRThreshold = 128; else @@ -114,8 +115,7 @@ namespace { void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); void DeleteDeadInstructions(); - AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, @@ -135,7 +135,8 @@ namespace { } char SROA::ID = 0; -static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); +INITIALIZE_PASS(SROA, "scalarrepl", + "Scalar Replacement of Aggregates", false, false); // Public interface to the ScalarReplAggregates pass FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { @@ -193,6 +194,27 @@ private: }; } // end anonymous namespace. + +/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't +/// allowed to form. We do this to avoid MMX types, which is a complete hack, +/// but is required until the backend is fixed. +static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) { + StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple()); + if (!Triple.startswith("i386") && + !Triple.startswith("x86_64")) + return false; + + // Reject all the MMX vector types. + switch (VTy->getNumElements()) { + default: return false; + case 1: return VTy->getElementType()->isIntegerTy(64); + case 2: return VTy->getElementType()->isIntegerTy(32); + case 4: return VTy->getElementType()->isIntegerTy(16); + case 8: return VTy->getElementType()->isIntegerTy(8); + } +} + + /// TryConvert - Analyze the specified alloca, and if it is safe to do so, /// rewrite it to be a new alloca which is mem2reg'able. This returns the new /// alloca if possible or null if not. @@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + if (VectorTy && VectorTy->isVectorTy() && HadAVector && + !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) { DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. @@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == CallInst::ArgOffset, Info); + UI.getOperandNo() == 0, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) + return HasPadding(ATy->getElementType(), TD); + + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + return HasPadding(VTy->getElementType(), TD); + if (const StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned PrevFieldBitOffset = 0; @@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { if (PrevFieldEnd < SL->getSizeInBits()) return true; } - - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - return HasPadding(ATy->getElementType(), TD); - } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { - return HasPadding(VTy->getElementType(), TD); } + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } @@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != CallInst::ArgOffset) return false; + if (UI.getOperandNo() != 0) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 49d93a2..360749c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGSimplifyPass() : FunctionPass(&ID) {} + CFGSimplifyPass() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); }; } char CFGSimplifyPass::ID = 0; -static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG"); +INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg", + "Simplify the CFG", false, false); // Public interface to the CFGSimplification pass FunctionPass *llvm::createCFGSimplificationPass() { @@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks (except the first one) and remove them - // if they are unneeded... + // Loop over all of the basic blocks and remove them if they are unneeded... // - for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) { + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { if (SimplifyCFG(BBIt++, TD)) { LocalChange = true; ++NumSimpl; diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index c3408e7..3ec70ec 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -32,7 +32,7 @@ namespace { const TargetData *TD; public: static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {} + SimplifyHalfPowrLibCalls() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -46,8 +46,8 @@ namespace { char SimplifyHalfPowrLibCalls::ID = 0; } // end anonymous namespace. -static RegisterPass<SimplifyHalfPowrLibCalls> -X("simplify-libcalls-halfpowr", "Simplify half_powr library calls"); +INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", + "Simplify half_powr library calls", false, false); // Public interface to the Simplify HalfPowr LibCalls pass. FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index b1c6191..d7ce53f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization { StrLen, B, TD); for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { - ICmpInst *Old = cast<ICmpInst>(UI++); + ICmpInst *Old = cast<ICmpInst>(*UI++); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); @@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), - CI->getType()); + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), + ToFindStr[0], B, TD), CI->getType()); return 0; } }; @@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), - false); + Value *Val = B.CreateIntCast(CI->getArgOperand(1), + Type::getInt8Ty(*Context), false); EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); return CI->getArgOperand(0); } @@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()+1), 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the + ConstantInt::get(TD->getIntPtrType(*Context), // nul byte. + FormatStr.size() + 1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization { Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), + IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1236,7 +1237,7 @@ namespace { bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {} + SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {} void InitOptimizations(); bool runOnFunction(Function &F); @@ -1253,8 +1254,8 @@ namespace { char SimplifyLibCalls::ID = 0; } // end anonymous namespace. -static RegisterPass<SimplifyLibCalls> -X("simplify-libcalls", "Simplify well-known library calls"); +INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false); // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { @@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * pow(pow(x,y),z)-> pow(x,y*z) // // puts: -// * puts("") -> putchar("\n") +// * puts("") -> putchar('\n') // // round, roundf, roundl: // * round(cnst) -> cnst' diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp index b88ba48..95d3ded 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp @@ -35,7 +35,7 @@ namespace { public: static char ID; // Pass identification - Sinking() : FunctionPass(&ID) {} + Sinking() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -56,8 +56,7 @@ namespace { } // end anonymous namespace char Sinking::ID = 0; -static RegisterPass<Sinking> -X("sink", "Code sinking"); +INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false); FunctionPass *llvm::createSinkingPass() { return new Sinking(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp index 9208238..2e437ac 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp @@ -49,7 +49,7 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - TailDup() : FunctionPass(&ID) {} + TailDup() : FunctionPass(ID) {} private: inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); @@ -59,7 +59,7 @@ namespace { } char TailDup::ID = 0; -static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication"); +INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false); // Public interface to the Tail Duplication pass FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 01c8e5d..3717254 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid - TailCallElim() : FunctionPass(&ID) {} + TailCallElim() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -87,7 +87,8 @@ namespace { } char TailCallElim::ID = 0; -static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination"); +INITIALIZE_PASS(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false); // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { @@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; - for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) - if (RI != IgnoreRI) { - Value *RetOp = RI->getOperand(0); - - // We can only perform this transformation if the value returned is - // evaluatable at the start of the initial invocation of the function, - // instead of at the end of the evaluation. - // - if (!isDynamicConstant(RetOp, CI, RI)) - return 0; - - if (ReturnedValue && RetOp != ReturnedValue) - return 0; // Cannot transform if differing values are returned. - ReturnedValue = RetOp; - } + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { + ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); + if (RI == 0 || RI == IgnoreRI) continue; + + // We can only perform this transformation if the value returned is + // evaluatable at the start of the initial invocation of the function, + // instead of at the end of the evaluation. + // + Value *RetOp = RI->getOperand(0); + if (!isDynamicConstant(RetOp, CI, RI)) + return 0; + + if (ReturnedValue && RetOp != ReturnedValue) + return 0; // Cannot transform if differing values are returned. + ReturnedValue = RetOp; + } return ReturnedValue; } @@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, assert(I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"); - // Exactly one operand should be the result of the call instruction... + // Exactly one operand should be the result of the call instruction. if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || (I->getOperand(0) != CI && I->getOperand(1) != CI)) return 0; @@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. - for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) - if (!CanMoveAboveCall(BBI, CI)) { - // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed - // using accumulator recursion elimination. Check to see if this is the - // case, and if so, remember the initial accumulator value for later. - if ((AccumulatorRecursionEliminationInitVal = - CanTransformAccumulatorRecursion(BBI, CI))) { - // Yes, this is accumulator recursion. Remember which instruction - // accumulates. - AccumulatorRecursionInstr = BBI; - } else { - return false; // Otherwise, we cannot eliminate the tail recursion! - } + for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) { + if (CanMoveAboveCall(BBI, CI)) continue; + + // If we can't move the instruction above the call, it might be because it + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. + if ((AccumulatorRecursionEliminationInitVal = + CanTransformAccumulatorRecursion(BBI, CI))) { + // Yes, this is accumulator recursion. Remember which instruction + // accumulates. + AccumulatorRecursionInstr = BBI; + } else { + return false; // Otherwise, we cannot eliminate the tail recursion! } + } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index ec625b4..093083a 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -97,23 +97,13 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { - pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - // Can't merge the entry block. Don't merge away blocks who have their - // address taken: this is a bug if the predecessor block is the entry node - // (because we'd end up taking the address of the entry) and undesirable in - // any case. - if (pred_begin(BB) == pred_end(BB) || - BB->hasAddressTaken()) return false; + // Don't merge away blocks who have their address taken. + if (BB->hasAddressTaken()) return false; - BasicBlock *PredBB = *PI++; - for (; PI != PE; ++PI) // Search all predecessors, see if they are all same - if (*PI != PredBB) { - PredBB = 0; // There are multiple different predecessors... - break; - } - - // Can't merge if there are multiple predecessors. + // Can't merge if there are multiple predecessors, or no predecessors. + BasicBlock *PredBB = BB->getUniquePredecessor(); if (!PredBB) return false; + // Don't break self-loops. if (PredBB == BB) return false; // Don't break invokes. @@ -267,7 +257,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { case Instruction::Switch: // Should remove entry default: case Instruction::Ret: // Cannot happen, has no successors! - llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!"); + llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!"); } if (NewTI) // If it's a different instruction, replace. @@ -421,7 +411,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); - if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) + if (DominanceFrontier *DF = + P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI diff --git a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp index f0e31ef..23a30cc5 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp @@ -82,8 +82,8 @@ void BasicInlinerImpl::inlineFunctions() { Function *F = *FI; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && CS.getCalledFunction() + CallSite CS(cast<Value>(I)); + if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isDeclaration()) CallSites.push_back(CS); } diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 26f53c0..f75ffe6 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -36,7 +36,7 @@ STATISTIC(NumBroken, "Number of blocks inserted"); namespace { struct BreakCriticalEdges : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(&ID) {} + BreakCriticalEdges() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -53,11 +53,11 @@ namespace { } char BreakCriticalEdges::ID = 0; -static RegisterPass<BreakCriticalEdges> -X("break-crit-edges", "Break critical edges in CFG"); +INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", + "Break critical edges in CFG", false, false); // Publically exposed interface to pass... -const PassInfo *const llvm::BreakCriticalEdgesID = &X; +char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); } @@ -225,7 +225,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; - if (PHINode *PN = dyn_cast<PHINode>(Use)) { + if (PHINode *PN = dyn_cast<PHINode>(*Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 7a9d007..c313949 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -421,9 +421,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - 1, false, B, TD); + if (isFoldable(3, 2, false)) { + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); replaceCall(CI->getArgOperand(0)); return true; } @@ -444,9 +444,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { - EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - 1, false, B, TD); + if (isFoldable(3, 2, false)) { + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); replaceCall(CI->getArgOperand(0)); return true; } @@ -462,10 +462,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + if (isFoldable(3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), + false, B, TD); replaceCall(CI->getArgOperand(0)); return true; } @@ -487,7 +488,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) { + if (isFoldable(2, 1, true)) { Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, Name.substr(2, 6)); replaceCall(Ret); @@ -505,7 +506,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TD, Name.substr(2, 7)); replaceCall(Ret); diff --git a/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt b/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt index dec227a..61cbeb2 100644 --- a/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -20,7 +20,6 @@ add_llvm_library(LLVMTransformUtils Mem2Reg.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp - SSI.cpp SimplifyCFG.cpp UnifyFunctionExitNodes.cpp ValueMapper.cpp diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 1dcfd57..f43186e 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -23,7 +23,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" -#include "ValueMapper.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" @@ -69,10 +69,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, } // Clone OldFunc into NewFunc, transforming the old arguments into references to -// ArgMap values. +// VMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -126,7 +127,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, VMap); + RemapInstruction(II, VMap, ModuleLevelChanges); } /// CloneFunction - Return a copy of the specified function, but without @@ -139,6 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, /// Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, ClonedCodeInfo *CodeInfo) { std::vector<const Type*> ArgTypes; @@ -167,7 +169,7 @@ Function *llvm::CloneFunction(const Function *F, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo); + CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo); return NewF; } @@ -180,6 +182,7 @@ namespace { Function *NewFunc; const Function *OldFunc; ValueToValueMapTy &VMap; + bool ModuleLevelChanges; SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; @@ -187,12 +190,14 @@ namespace { public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, + bool moduleLevelChanges, SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) - : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns), - NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + : NewFunc(newFunc), OldFunc(oldFunc), + VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), + Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } /// CloneBlock - The specified block is found to be reachable, clone it and @@ -313,7 +318,7 @@ ConstantFoldMappedInstruction(const Instruction *I) { SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - VMap))) + VMap, ModuleLevelChanges))) Ops.push_back(Op); else return 0; // All operands not constant! @@ -334,25 +339,16 @@ ConstantFoldMappedInstruction(const Instruction *I) { Ops.size(), TD); } -static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { - DILocation ILoc(InsnMD); - if (!ILoc.Verify()) return InsnMD; +static DebugLoc +UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL, + LLVMContext &Ctx) { + DebugLoc NewLoc = TheCallDL; + if (MDNode *IA = InsnDL.getInlinedAt(Ctx)) + NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL, + Ctx); - DILocation CallLoc(TheCallMD); - if (!CallLoc.Verify()) return InsnMD; - - DILocation OrigLocation = ILoc.getOrigLocation(); - MDNode *NewLoc = TheCallMD; - if (OrigLocation.Verify()) - NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD); - - Value *MDVs[] = { - InsnMD->getOperand(0), // Line - InsnMD->getOperand(1), // Col - InsnMD->getOperand(2), // Scope - NewLoc - }; - return MDNode::get(InsnMD->getContext(), MDVs, 4); + return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(), + InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx)); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -364,6 +360,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, @@ -377,8 +374,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(II) && "No mapping from source argument specified!"); #endif - PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, - NameSuffix, CodeInfo, TD); + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, + Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; @@ -408,10 +405,9 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // BasicBlock::iterator I = NewBB->begin(); - unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg"); - MDNode *TheCallMD = NULL; - if (TheCall && TheCall->hasMetadata()) - TheCallMD = TheCall->getMetadata(DbgKind); + DebugLoc TheCallDL; + if (TheCall) + TheCallDL = TheCall->getDebugLoc(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. @@ -420,15 +416,17 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { - if (TheCallMD) { - if (MDNode *IMD = I->getMetadata(DbgKind)) { - MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); - I->setMetadata(DbgKind, NewMD); + if (!TheCallDL.isUnknown()) { + DebugLoc IDL = I->getDebugLoc(); + if (!IDL.isUnknown()) { + DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, + I->getContext()); + I->setDebugLoc(NewDL); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. - I->setMetadata(DbgKind, 0); + I->setDebugLoc(DebugLoc()); } } PHIToResolve.push_back(cast<PHINode>(OldI)); @@ -444,18 +442,20 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { - if (TheCallMD) { - if (MDNode *IMD = I->getMetadata(DbgKind)) { - MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); - I->setMetadata(DbgKind, NewMD); + if (!TheCallDL.isUnknown()) { + DebugLoc IDL = I->getDebugLoc(); + if (!IDL.isUnknown()) { + DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, + I->getContext()); + I->setDebugLoc(NewDL); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. - I->setMetadata(DbgKind, 0); + I->setDebugLoc(DebugLoc()); } } - RemapInstruction(I, VMap); + RemapInstruction(I, VMap, ModuleLevelChanges); } } @@ -477,7 +477,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap); + VMap, ModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index fc603d2..b347bf5 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -17,7 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/TypeSymbolTable.h" #include "llvm/Constant.h" -#include "ValueMapper.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; /// CloneModule - Return an exact copy of the specified module. This is not as @@ -89,7 +89,8 @@ Module *llvm::CloneModule(const Module *M, GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - VMap))); + VMap, + true))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -108,7 +109,7 @@ Module *llvm::CloneModule(const Module *M, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, VMap, Returns); + CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); } F->setLinkage(I->getLinkage()); @@ -120,34 +121,17 @@ Module *llvm::CloneModule(const Module *M, GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, VMap))); + GA->setAliasee(cast<Constant>(MapValue(C, VMap, true))); } // And named metadata.... for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), E = M->named_metadata_end(); I != E; ++I) { const NamedMDNode &NMD = *I; - SmallVector<MDNode*, 4> MDs; + NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap))); - NamedMDNode::Create(New->getContext(), NMD.getName(), - MDs.data(), MDs.size(), New); + NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true))); } - // Update metadata attach with instructions. - for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI) - for (Function::iterator FI = MI->begin(), FE = MI->end(); - FI != FE; ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs; - BI->getAllMetadata(MDs); - for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator - MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) { - Value *MappedValue = MapValue(MDI->second, VMap); - if (MDI->second != MappedValue && MappedValue) - BI->setMetadata(MDI->first, cast<MDNode>(MappedValue)); - } - } return New; } diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 598e7d2..88979e86 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -215,12 +215,12 @@ static void UpdateCallGraphAfterInlining(CallSite CS, if (I->second->getFunction() == 0) if (Function *F = CallSite(NewCall).getCalledFunction()) { // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]); - + CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + continue; } - - CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); + + CallerNode->addCalledFunction(CallSite(NewCall), I->second); } // Update the call graph by deleting the edge from Callee to Caller. We must @@ -365,7 +365,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 090af95..5ca8299 100644 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -23,7 +23,7 @@ using namespace llvm; namespace { struct InstNamer : public FunctionPass { static char ID; // Pass identification, replacement for typeid - InstNamer() : FunctionPass(&ID) {} + InstNamer() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &Info) const { Info.setPreservesAll(); @@ -48,12 +48,12 @@ namespace { }; char InstNamer::ID = 0; - static RegisterPass<InstNamer> X("instnamer", - "Assign names to anonymous instructions"); + INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false); } -const PassInfo *const llvm::InstructionNamerID = &X; +char &llvm::InstructionNamerID = InstNamer::ID; //===----------------------------------------------------------------------===// // // InstructionNamer - Give any unnamed non-void instructions "tmp" names. diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index e90c30b..275b265 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -47,7 +47,7 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables"); namespace { struct LCSSA : public LoopPass { static char ID; // Pass identification, replacement for typeid - LCSSA() : LoopPass(&ID) {} + LCSSA() : LoopPass(ID) {} // Cached analysis information for the current function. DominatorTree *DT; @@ -64,22 +64,13 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - // LCSSA doesn't actually require LoopSimplify, but the PassManager - // doesn't know how to schedule LoopSimplify by itself. - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredTransitive<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - AU.addRequiredTransitive<DominatorTree>(); - AU.addPreserved<ScalarEvolution>(); + AU.addRequired<DominatorTree>(); AU.addPreserved<DominatorTree>(); - - // Request DominanceFrontier now, even though LCSSA does - // not use it. This allows Pass Manager to schedule Dominance - // Frontier early enough such that one LPPassManager can handle - // multiple loop transformation passes. - AU.addRequired<DominanceFrontier>(); AU.addPreserved<DominanceFrontier>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<ScalarEvolution>(); } private: bool ProcessInstruction(Instruction *Inst, @@ -99,10 +90,10 @@ namespace { } char LCSSA::ID = 0; -static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass"); +INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false); Pass *llvm::createLCSSAPass() { return new LCSSA(); } -const PassInfo *const llvm::LCSSAID = &X; +char &llvm::LCSSAID = LCSSA::ID; /// BlockDominatesAnExit - Return true if the specified block dominates at least @@ -215,7 +206,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, DomTreeNode *DomNode = DT->getNode(DomBB); SSAUpdater SSAUpdate; - SSAUpdate.Initialize(Inst); + SSAUpdate.Initialize(Inst->getType(), Inst->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 8e91138..52f0499 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -490,6 +490,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { /// rewriting all the predecessors to branch to the successor block and return /// true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { + assert(BB != &BB->getParent()->getEntryBlock() && + "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); + // We can't eliminate infinite loops. BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 4f4edf3..b3c4801 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -46,9 +46,9 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" @@ -65,27 +65,30 @@ STATISTIC(NumNested , "Number of nested loops split out"); namespace { struct LoopSimplify : public LoopPass { static char ID; // Pass identification, replacement for typeid - LoopSimplify() : LoopPass(&ID) {} + LoopSimplify() : LoopPass(ID) {} // AA - If we have an alias analysis object to update, this is it, otherwise // this is null. AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; + ScalarEvolution *SE; Loop *L; virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // We need loop information to identify the loops... - AU.addRequiredTransitive<LoopInfo>(); - AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<DominanceFrontier>(); + AU.addPreservedID(LCSSAID); } /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. @@ -104,11 +107,11 @@ namespace { } char LoopSimplify::ID = 0; -static RegisterPass<LoopSimplify> -X("loopsimplify", "Canonicalize natural loops", true); +INITIALIZE_PASS(LoopSimplify, "loopsimplify", + "Canonicalize natural loops", true, false); // Publically exposed interface to pass... -const PassInfo *const llvm::LoopSimplifyID = &X; +char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// runOnLoop - Run down all loops in the CFG (recursively, but we could do @@ -120,6 +123,7 @@ bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); AA = getAnalysisIfAvailable<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); Changed |= ProcessLoop(L, LPM); @@ -141,15 +145,16 @@ ReprocessLoop: BB != E; ++BB) { if (*BB == L->getHeader()) continue; - SmallPtrSet<BasicBlock *, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){ + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), + PE = pred_end(*BB); PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) BadPreds.insert(P); } // Delete each unique out-of-loop (and thus dead) predecessor. - for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), + for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), E = BadPreds.end(); I != E; ++I) { DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "; @@ -530,6 +535,12 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], OuterLoopPreds.size(), @@ -619,6 +630,11 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; + + // Indirectbr edges cannot be split, so we must fail if we find one. + if (isa<IndirectBrInst>(P->getTerminator())) + return 0; + if (P != Preheader) BackedgeBlocks.push_back(P); } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index e0e07e7..236bbe9 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -127,6 +128,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) return false; } + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) + SE->forgetLoop(L); + // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); // Find trip multiple if count is not available diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 2696e69..a46dd84 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -78,14 +78,14 @@ namespace { static char ID; // Pass identification, replacement for typeid explicit LowerInvoke(const TargetLowering *tli = NULL, bool useExpensiveEHSupport = ExpensiveEHSupport) - : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport), + : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // This is a cluster of orthogonal Transforms - AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreserved("mem2reg"); AU.addPreservedID(LowerSwitchID); } @@ -100,10 +100,11 @@ namespace { } char LowerInvoke::ID = 0; -static RegisterPass<LowerInvoke> -X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators"); +INITIALIZE_PASS(LowerInvoke, "lowerinvoke", + "Lower invoke and unwind, for unwindless code generators", + false, false); -const PassInfo *const llvm::LowerInvokePassID = &X; +char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 468a5fe..5530b47 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -29,19 +29,18 @@ using namespace llvm; namespace { /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch - /// instructions. Note that this cannot be a BasicBlock pass because it - /// modifies the CFG! + /// instructions. class LowerSwitch : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - LowerSwitch() : FunctionPass(&ID) {} + LowerSwitch() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); - AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreserved("mem2reg"); AU.addPreservedID(LowerInvokePassID); } @@ -50,8 +49,7 @@ namespace { Constant* High; BasicBlock* BB; - CaseRange() : Low(0), High(0), BB(0) { } - CaseRange(Constant* low, Constant* high, BasicBlock* bb) : + CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : Low(low), High(high), BB(bb) { } }; @@ -81,11 +79,11 @@ namespace { } char LowerSwitch::ID = 0; -static RegisterPass<LowerSwitch> -X("lowerswitch", "Lower SwitchInst's to branches"); +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false); // Publically exposed interface to pass... -const PassInfo *const llvm::LowerSwitchID = &X; +char &llvm::LowerSwitchID = LowerSwitch::ID; // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { return new LowerSwitch(); diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 99203b6..101645b 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -27,7 +27,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { struct PromotePass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - PromotePass() : FunctionPass(&ID) {} + PromotePass() : FunctionPass(ID) {} // runOnFunction - To run this pass, first we calculate the alloca // instructions that are safe for promotion, then we promote each one. @@ -49,7 +49,8 @@ namespace { } // end of anonymous namespace char PromotePass::ID = 0; -static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register"); +INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register", + false, false); bool PromotePass::runOnFunction(Function &F) { std::vector<AllocaInst*> Allocas; @@ -81,8 +82,6 @@ bool PromotePass::runOnFunction(Function &F) { return Changed; } -// Publically exposed interface to pass... -const PassInfo *const llvm::PromoteMemoryToRegisterID = &X; // createPromoteMemoryToRegister - Provide an entry point to create this pass. // FunctionPass *llvm::createPromoteMemoryToRegisterPass() { diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index c0de193..a4e3029 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -228,14 +228,6 @@ namespace { void run(); - /// properlyDominates - Return true if I1 properly dominates I2. - /// - bool properlyDominates(Instruction *I1, Instruction *I2) const { - if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) - I1 = II->getNormalDest()->begin(); - return DT.properlyDominates(I1->getParent(), I2->getParent()); - } - /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. /// bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { @@ -896,11 +888,12 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DIVar, SI); // Propagate any debug metadata from the store onto the dbg.value. - if (MDNode *SIMD = SI->getMetadata("dbg")) - DbgVal->setMetadata("dbg", SIMD); + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); // Otherwise propagate debug metadata from dbg.declare. - else if (MDNode *MD = DDI->getMetadata("dbg")) - DbgVal->setMetadata("dbg", MD); + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); } // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index f4bdb527..c855988 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -29,20 +29,21 @@ static AvailableValsTy &getAvailableVals(void *AV) { } SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {} + : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); } /// Initialize - Reset this object to get ready for a new set of SSA -/// updates. ProtoValue is the value used to name PHI nodes. -void SSAUpdater::Initialize(Value *ProtoValue) { +/// updates with type 'Ty'. PHI nodes get a name based on 'Name'. +void SSAUpdater::Initialize(const Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - PrototypeValue = ProtoValue; + ProtoType = Ty; + ProtoName = Name; } /// HasValueForBlock - Return true if the SSAUpdater already has a value for @@ -54,8 +55,8 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { /// AddAvailableValue - Indicate that a rewritten value is available in the /// specified block with the specified value. void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(PrototypeValue != 0 && "Need to initialize SSAUpdater"); - assert(PrototypeValue->getType() == V->getType() && + assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; } @@ -148,7 +149,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there are no predecessors, just return undef. if (PredValues.empty()) - return UndefValue::get(PrototypeValue->getType()); + return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue != 0) @@ -168,9 +169,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), - PrototypeValue->getName(), - &BB->front()); + PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front()); InsertedPHI->reserveOperandSpace(PredValues.size()); // Fill in all the predecessors of the PHI. @@ -205,6 +204,22 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } +/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However, +/// this version of the method can rewrite uses in the same block as a +/// definition, because it assumes that all uses of a value are below any +/// inserted values. +void SSAUpdater::RewriteUseAfterInsertions(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueAtEndOfBlock(User->getParent()); + + U.set(V); +} + /// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator /// in the SSAUpdaterImpl template. namespace { @@ -266,15 +281,14 @@ public: /// GetUndefVal - Get an undefined value of the same type as the value /// being handled. static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { - return UndefValue::get(Updater->PrototypeValue->getType()); + return UndefValue::get(Updater->ProtoType); } /// CreateEmptyPHI - Create a new PHI instruction in the specified block. /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(), - Updater->PrototypeValue->getName(), + PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName, &BB->front()); PHI->reserveOperandSpace(NumPreds); return PHI; diff --git a/contrib/llvm/lib/Transforms/Utils/SSI.cpp b/contrib/llvm/lib/Transforms/Utils/SSI.cpp deleted file mode 100644 index 4e813dd..0000000 --- a/contrib/llvm/lib/Transforms/Utils/SSI.cpp +++ /dev/null @@ -1,432 +0,0 @@ -//===------------------- SSI.cpp - Creates SSI Representation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass converts a list of variables to the Static Single Information -// form. This is a program representation described by Scott Ananian in his -// Master Thesis: "The Static Single Information Form (1999)". -// We are building an on-demand representation, that is, we do not convert -// every single variable in the target function to SSI form. Rather, we receive -// a list of target variables that must be converted. We also do not -// completely convert a target variable to the SSI format. Instead, we only -// change the variable in the points where new information can be attached -// to its live range, that is, at branch points. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ssi" - -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/SSI.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" - -using namespace llvm; - -static const std::string SSI_PHI = "SSI_phi"; -static const std::string SSI_SIG = "SSI_sigma"; - -STATISTIC(NumSigmaInserted, "Number of sigma functions inserted"); -STATISTIC(NumPhiInserted, "Number of phi functions inserted"); - -void SSI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive<DominanceFrontier>(); - AU.addRequiredTransitive<DominatorTree>(); - AU.setPreservesAll(); -} - -bool SSI::runOnFunction(Function &F) { - DT_ = &getAnalysis<DominatorTree>(); - return false; -} - -/// This methods creates the SSI representation for the list of values -/// received. It will only create SSI representation if a value is used -/// to decide a branch. Repeated values are created only once. -/// -void SSI::createSSI(SmallVectorImpl<Instruction *> &value) { - init(value); - - SmallPtrSet<Instruction*, 4> needConstruction; - for (SmallVectorImpl<Instruction*>::iterator I = value.begin(), - E = value.end(); I != E; ++I) - if (created.insert(*I)) - needConstruction.insert(*I); - - insertSigmaFunctions(needConstruction); - - // Test if there is a need to transform to SSI - if (!needConstruction.empty()) { - insertPhiFunctions(needConstruction); - renameInit(needConstruction); - rename(DT_->getRoot()); - fixPhis(); - } - - clean(); -} - -/// Insert sigma functions (a sigma function is a phi function with one -/// operator) -/// -void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) { - for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), - E = value.end(); I != E; ++I) { - for (Value::use_iterator begin = (*I)->use_begin(), - end = (*I)->use_end(); begin != end; ++begin) { - // Test if the Use of the Value is in a comparator - if (CmpInst *CI = dyn_cast<CmpInst>(begin)) { - // Iterates through all uses of CmpInst - for (Value::use_iterator begin_ci = CI->use_begin(), - end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) { - // Test if any use of CmpInst is in a Terminator - if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) { - insertSigma(TI, *I); - } - } - } - } - } -} - -/// Inserts Sigma Functions in every BasicBlock successor to Terminator -/// Instruction TI. All inserted Sigma Function are related to Instruction I. -/// -void SSI::insertSigma(TerminatorInst *TI, Instruction *I) { - // Basic Block of the Terminator Instruction - BasicBlock *BB = TI->getParent(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { - // Next Basic Block - BasicBlock *BB_next = TI->getSuccessor(i); - if (BB_next != BB && - BB_next->getSinglePredecessor() != NULL && - dominateAny(BB_next, I)) { - PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin()); - PN->addIncoming(I, BB); - sigmas[PN] = I; - created.insert(PN); - defsites[I].push_back(BB_next); - ++NumSigmaInserted; - } - } -} - -/// Insert phi functions when necessary -/// -void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) { - DominanceFrontier *DF = &getAnalysis<DominanceFrontier>(); - for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), - E = value.end(); I != E; ++I) { - // Test if there were any sigmas for this variable - SmallPtrSet<BasicBlock *, 16> BB_visited; - - // Insert phi functions if there is any sigma function - while (!defsites[*I].empty()) { - - BasicBlock *BB = defsites[*I].back(); - - defsites[*I].pop_back(); - DominanceFrontier::iterator DF_BB = DF->find(BB); - - // The BB is unreachable. Skip it. - if (DF_BB == DF->end()) - continue; - - // Iterates through all the dominance frontier of BB - for (std::set<BasicBlock *>::iterator DF_BB_begin = - DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); - DF_BB_begin != DF_BB_end; ++DF_BB_begin) { - BasicBlock *BB_dominated = *DF_BB_begin; - - // Test if has not yet visited this node and if the - // original definition dominates this node - if (BB_visited.insert(BB_dominated) && - DT_->properlyDominates(value_original[*I], BB_dominated) && - dominateAny(BB_dominated, *I)) { - PHINode *PN = PHINode::Create( - (*I)->getType(), SSI_PHI, BB_dominated->begin()); - phis.insert(std::make_pair(PN, *I)); - created.insert(PN); - - defsites[*I].push_back(BB_dominated); - ++NumPhiInserted; - } - } - } - BB_visited.clear(); - } -} - -/// Some initialization for the rename part -/// -void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) { - for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), - E = value.end(); I != E; ++I) - value_stack[*I].push_back(*I); -} - -/// Renames all variables in the specified BasicBlock. -/// Only variables that need to be rename will be. -/// -void SSI::rename(BasicBlock *BB) { - SmallPtrSet<Instruction*, 8> defined; - - // Iterate through instructions and make appropriate renaming. - // For SSI_PHI (b = PHI()), store b at value_stack as a new - // definition of the variable it represents. - // For SSI_SIG (b = PHI(a)), substitute a with the current - // value of a, present in the value_stack. - // Then store bin the value_stack as the new definition of a. - // For all other instructions (b = OP(a, c, d, ...)), we need to substitute - // all operands with its current value, present in value_stack. - for (BasicBlock::iterator begin = BB->begin(), end = BB->end(); - begin != end; ++begin) { - Instruction *I = begin; - if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions - Instruction* position; - - // Treat SSI_PHI - if ((position = getPositionPhi(PN))) { - value_stack[position].push_back(PN); - defined.insert(position); - // Treat SSI_SIG - } else if ((position = getPositionSigma(PN))) { - substituteUse(I); - value_stack[position].push_back(PN); - defined.insert(position); - } - - // Treat all other PHI functions - else { - substituteUse(I); - } - } - - // Treat all other functions - else { - substituteUse(I); - } - } - - // This loop iterates in all BasicBlocks that are successors of the current - // BasicBlock. For each SSI_PHI instruction found, insert an operand. - // This operand is the current operand in value_stack for the variable - // in "position". And the BasicBlock this operand represents is the current - // BasicBlock. - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { - BasicBlock *BB_succ = *SI; - - for (BasicBlock::iterator begin = BB_succ->begin(), - notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { - Instruction *I = begin; - PHINode *PN = dyn_cast<PHINode>(I); - Instruction* position; - if (PN && ((position = getPositionPhi(PN)))) { - PN->addIncoming(value_stack[position].back(), BB); - } - } - } - - // This loop calls rename on all children from this block. This time children - // refers to a successor block in the dominance tree. - DomTreeNode *DTN = DT_->getNode(BB); - for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end(); - begin != end; ++begin) { - DomTreeNodeBase<BasicBlock> *DTN_children = *begin; - BasicBlock *BB_children = DTN_children->getBlock(); - rename(BB_children); - } - - // Now we remove all inserted definitions of a variable from the top of - // the stack leaving the previous one as the top. - for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), - DE = defined.end(); DI != DE; ++DI) - value_stack[*DI].pop_back(); -} - -/// Substitute any use in this instruction for the last definition of -/// the variable -/// -void SSI::substituteUse(Instruction *I) { - for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { - Value *operand = I->getOperand(i); - for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator - VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) { - if (operand == VI->second.front() && - I != VI->second.back()) { - PHINode *PN_I = dyn_cast<PHINode>(I); - PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back()); - - // If a phi created in a BasicBlock is used as an operand of another - // created in the same BasicBlock, this step marks this second phi, - // to fix this issue later. It cannot be fixed now, because the - // operands of the first phi are not final yet. - if (PN_I && PN_vs && - VI->second.back()->getParent() == I->getParent()) { - - phisToFix.insert(PN_I); - } - - I->setOperand(i, VI->second.back()); - break; - } - } - } -} - -/// Test if the BasicBlock BB dominates any use or definition of value. -/// If it dominates a phi instruction that is on the same BasicBlock, -/// that does not count. -/// -bool SSI::dominateAny(BasicBlock *BB, Instruction *value) { - for (Value::use_iterator begin = value->use_begin(), - end = value->use_end(); begin != end; ++begin) { - Instruction *I = cast<Instruction>(*begin); - BasicBlock *BB_father = I->getParent(); - if (BB == BB_father && isa<PHINode>(I)) - continue; - if (DT_->dominates(BB, BB_father)) { - return true; - } - } - return false; -} - -/// When there is a phi node that is created in a BasicBlock and it is used -/// as an operand of another phi function used in the same BasicBlock, -/// LLVM looks this as an error. So on the second phi, the first phi is called -/// P and the BasicBlock it incomes is B. This P will be replaced by the value -/// it has for BasicBlock B. It also includes undef values for predecessors -/// that were not included in the phi. -/// -void SSI::fixPhis() { - for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(), - end = phisToFix.end(); begin != end; ++begin) { - PHINode *PN = *begin; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { - PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i)); - if (PN_father && PN->getParent() == PN_father->getParent() && - !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) { - BasicBlock *BB = PN->getIncomingBlock(i); - int pos = PN_father->getBasicBlockIndex(BB); - PN->setIncomingValue(i, PN_father->getIncomingValue(pos)); - } - } - } - - for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(), - end = phis.end(); begin != end; ++begin) { - PHINode *PN = begin->first; - BasicBlock *BB = PN->getParent(); - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - SmallVector<BasicBlock*, 8> Preds(PI, PE); - for (unsigned size = Preds.size(); - PI != PE && PN->getNumIncomingValues() != size; ++PI) { - bool found = false; - for (unsigned i = 0, pn_end = PN->getNumIncomingValues(); - i < pn_end; ++i) { - if (PN->getIncomingBlock(i) == *PI) { - found = true; - break; - } - } - if (!found) { - PN->addIncoming(UndefValue::get(PN->getType()), *PI); - } - } - } -} - -/// Return which variable (position on the vector of variables) this phi -/// represents on the phis list. -/// -Instruction* SSI::getPositionPhi(PHINode *PN) { - DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN); - if (val == phis.end()) - return 0; - else - return val->second; -} - -/// Return which variable (position on the vector of variables) this phi -/// represents on the sigmas list. -/// -Instruction* SSI::getPositionSigma(PHINode *PN) { - DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN); - if (val == sigmas.end()) - return 0; - else - return val->second; -} - -/// Initializes -/// -void SSI::init(SmallVectorImpl<Instruction *> &value) { - for (SmallVectorImpl<Instruction *>::iterator I = value.begin(), - E = value.end(); I != E; ++I) { - value_original[*I] = (*I)->getParent(); - defsites[*I].push_back((*I)->getParent()); - } -} - -/// Clean all used resources in this creation of SSI -/// -void SSI::clean() { - phis.clear(); - sigmas.clear(); - phisToFix.clear(); - - defsites.clear(); - value_stack.clear(); - value_original.clear(); -} - -/// createSSIPass - The public interface to this file... -/// -FunctionPass *llvm::createSSIPass() { return new SSI(); } - -char SSI::ID = 0; -static RegisterPass<SSI> X("ssi", "Static Single Information Construction"); - -/// SSIEverything - A pass that runs createSSI on every non-void variable, -/// intended for debugging. -namespace { - struct SSIEverything : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - SSIEverything() : FunctionPass(&ID) {} - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<SSI>(); - } - }; -} - -bool SSIEverything::runOnFunction(Function &F) { - SmallVector<Instruction *, 16> Insts; - SSI &ssi = getAnalysis<SSI>(); - - if (F.isDeclaration() || F.isIntrinsic()) return false; - - for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) - for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) - if (!I->getType()->isVoidTy()) - Insts.push_back(I); - - ssi.createSSI(Insts); - return true; -} - -/// createSSIEverythingPass - The public interface to this file... -/// -FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); } - -char SSIEverything::ID = 0; -static RegisterPass<SSIEverything> -Y("ssi-everything", "Static Single Information Construction"); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 27b07d9..28d7afb 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -949,7 +949,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { UI != E; ++UI) { // Ignore any user that is not a PHI node in BB2. These can only occur in // unreachable blocks, because they would not be dominated by the instr. - PHINode *PN = dyn_cast<PHINode>(UI); + PHINode *PN = dyn_cast<PHINode>(*UI); if (!PN || PN->getParent() != BB2) return false; PHIUses.push_back(PN); @@ -1724,12 +1724,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { assert(BB && BB->getParent() && "Block not embedded in function!"); assert(BB->getTerminator() && "Degenerate basic block encountered!"); - assert(&BB->getParent()->getEntryBlock() != BB && - "Can't Simplify entry block!"); - // Remove basic blocks that have no predecessors... or that just have themself - // as a predecessor. These are unreachable. - if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_begin(BB) == pred_end(BB) && + &BB->getParent()->getEntryBlock() != BB) || + BB->getSinglePredecessor() == BB) { DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; @@ -1880,8 +1880,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { while (isa<DbgInfoIntrinsic>(BBI)) ++BBI; if (BBI->isTerminator()) // Terminator is the only non-phi instruction! - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) - return true; + if (BB != &BB->getParent()->getEntryBlock()) + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + return true; } else { // Conditional branch if (isValueEqualityComparison(BI)) { @@ -2049,12 +2050,38 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } // If this block is now dead, remove it. - if (pred_begin(BB) == pred_end(BB)) { + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. M->getBasicBlockList().erase(BB); return true; } } + } else if (IndirectBrInst *IBI = + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; --e; + Changed = true; + } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + IBI->eraseFromParent(); + Changed = true; + } else if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + IBI->eraseFromParent(); + Changed = true; + } } // Merge basic blocks into their predecessor if there is only one distinct @@ -2068,12 +2095,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // is a conditional branch, see if we can hoist any code from this block up // into our predecessor. pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - BasicBlock *OnlyPred = *PI++; - for (; PI != PE; ++PI) // Search all predecessors, see if they are all same - if (*PI != OnlyPred) { + BasicBlock *OnlyPred = 0; + for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same + if (!OnlyPred) + OnlyPred = *PI; + else if (*PI != OnlyPred) { OnlyPred = 0; // There are multiple different predecessors... break; } + } if (OnlyPred) if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator())) @@ -2172,8 +2202,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -/// WARNING: The entry node of a function may not be simplified. -/// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { return SimplifyCFGOpt(TD).run(BB); } diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 3fa8b70..a51f1e1 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -24,8 +24,8 @@ using namespace llvm; char UnifyFunctionExitNodes::ID = 0; -static RegisterPass<UnifyFunctionExitNodes> -X("mergereturn", "Unify function exit nodes"); +INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", + "Unify function exit nodes", false, false); Pass *llvm::createUnifyFunctionExitNodesPass() { return new UnifyFunctionExitNodes(); @@ -35,7 +35,7 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); // This is a cluster of orthogonal Transforms - AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreserved("mem2reg"); AU.addPreservedID(LowerSwitchID); } diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 3f6a90c..fc4bde7 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "ValueMapper.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -20,28 +20,51 @@ #include "llvm/ADT/SmallVector.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, + bool ModuleLevelChanges) { Value *&VMSlot = VM[V]; if (VMSlot) return VMSlot; // Does it exist in the map yet? // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the // DenseMap. This includes any recursive calls to MapValue. - // Global values and non-function-local metadata do not need to be seeded into - // the VM if they are using the identity mapping. + // Global values do not need to be seeded into the VM if they + // are using the identity mapping. if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) || - (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal())) + (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() && + !ModuleLevelChanges)) return VMSlot = const_cast<Value*>(V); if (const MDNode *MD = dyn_cast<MDNode>(V)) { - SmallVector<Value*, 4> Elts; - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) - Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0); - return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + // Start by assuming that we'll use the identity mapping. + VMSlot = const_cast<Value*>(V); + + // Check all operands to see if any need to be remapped. + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { + Value *OP = MD->getOperand(i); + if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue; + + // Ok, at least one operand needs remapping. + MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); + VM[V] = Dummy; + SmallVector<Value*, 4> Elts; + Elts.reserve(MD->getNumOperands()); + for (i = 0; i != e; ++i) + Elts.push_back(MD->getOperand(i) ? + MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0); + MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + Dummy->replaceAllUsesWith(NewMD); + MDNode::deleteTemporary(Dummy); + return VM[V] = NewMD; + } + + // No operands needed remapping; keep the identity map. + return const_cast<Value*>(V); } Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); - if (C == 0) return 0; + if (C == 0) + return 0; if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) || @@ -51,7 +74,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, ModuleLevelChanges); if (MV != *i) { // This array must contain a reference to a global, make a new array // and return it. @@ -62,7 +85,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, + ModuleLevelChanges))); return VM[V] = ConstantArray::get(CA->getType(), Values); } } @@ -72,7 +96,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, ModuleLevelChanges); if (MV != *i) { // This struct must contain a reference to a global, make a new struct // and return it. @@ -83,7 +107,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, + ModuleLevelChanges))); return VM[V] = ConstantStruct::get(CS->getType(), Values); } } @@ -93,14 +118,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { std::vector<Constant*> Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(*i, VM))); + Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges))); return VM[V] = CE->getWithOperands(Ops); } if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, ModuleLevelChanges); if (MV != *i) { // This vector value must contain a reference to a global, make a new // vector constant and return it. @@ -111,7 +136,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, + ModuleLevelChanges))); return VM[V] = ConstantVector::get(Values); } } @@ -119,19 +145,33 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { } BlockAddress *BA = cast<BlockAddress>(C); - Function *F = cast<Function>(MapValue(BA->getFunction(), VM)); - BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM)); + Function *F = cast<Function>(MapValue(BA->getFunction(), VM, + ModuleLevelChanges)); + BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM, + ModuleLevelChanges)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// -void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) { +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, + bool ModuleLevelChanges) { + // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap); + Value *V = MapValue(*op, VMap, ModuleLevelChanges); assert(V && "Referenced value not in value map!"); *op = V; } -} + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { + Value *Old = MI->second; + Value *New = MapValue(Old, VMap, ModuleLevelChanges); + if (New != Old) + I->setMetadata(MI->first, cast<MDNode>(New)); + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.h b/contrib/llvm/lib/Transforms/Utils/ValueMapper.h deleted file mode 100644 index f4ff643..0000000 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the MapValue interface which is used by various parts of -// the Transforms/Utils library to implement cloning and linking facilities. -// -//===----------------------------------------------------------------------===// - -#ifndef VALUEMAPPER_H -#define VALUEMAPPER_H - -#include "llvm/ADT/ValueMap.h" - -namespace llvm { - class Value; - class Instruction; - typedef ValueMap<const Value *, Value *> ValueToValueMapTy; - - Value *MapValue(const Value *V, ValueToValueMapTy &VM); - void RemapInstruction(Instruction *I, ValueToValueMapTy &VM); -} // End llvm namespace - -#endif |