diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms')
102 files changed, 12551 insertions, 8353 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 0c77e1f..0c650cf 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -39,7 +39,6 @@ #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -67,7 +66,9 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) {} + : CallGraphSCCPass(ID), maxElements(maxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); + } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; @@ -84,8 +85,12 @@ namespace { } char ArgPromotion::ID = 0; -INITIALIZE_PASS(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false); +INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); @@ -130,47 +135,74 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't - // transform functions that have indirect callers. - if (F->hasAddressTaken()) - return 0; - + // transform functions that have indirect callers. Also see if the function + // is self-recursive. + bool isSelfRecursive = false; + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); + UI != E; ++UI) { + CallSite CS(*UI); + // Must be a direct call. + if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0; + + if (CS.getInstruction()->getParent()->getParent() == F) + isSelfRecursive = true; + } + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); + Argument *PtrArg = PointerArgs[i].first; + const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. - Argument *PtrArg = PointerArgs[i].first; if (isByVal) { - const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); - } else { - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - if (!STy->getElementType(i)->isSingleValueType()) { - AllSimple = false; - break; - } - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow scalarrepl to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; + continue; + } + + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + if (!STy->getElementType(i)->isSingleValueType()) { + AllSimple = false; + break; } } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow scalarrepl to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } } } + // If the argument is a recursive type and we're in a recursive + // function, we could end up infinitely peeling the function argument. + if (isSelfRecursive) { + if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + bool RecursiveType = false; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + if (STy->getElementType(i) == PtrArg->getType()) { + RecursiveType = true; + break; + } + } + if (RecursiveType) + continue; + } + } + // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, isByVal)) ArgsToPromote.insert(PtrArg); @@ -183,22 +215,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } -/// IsAlwaysValidPointer - Return true if the specified pointer is always legal -/// to load. -static bool IsAlwaysValidPointer(Value *V) { - if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true; - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) - return IsAlwaysValidPointer(GEP->getOperand(0)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::GetElementPtr) - return IsAlwaysValidPointer(CE->getOperand(0)); - - return false; -} - -/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that +/// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); unsigned ArgNo = std::distance(Callee->arg_begin(), @@ -211,7 +230,7 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { CallSite CS(*UI); assert(CS && "Should only have direct calls!"); - if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) return false; } return true; @@ -318,7 +337,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) + if (isByVal || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -434,8 +453,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - TargetData *TD = getAnalysisIfAvailable<TargetData>(); - if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to @@ -443,11 +460,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); - const PointerType *LoadTy = - cast<PointerType>(Load->getPointerOperand()->getType()); - unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); - - if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) + AliasAnalysis::Location Loc = AA.getLocation(Load); + if (AA.canInstructionRangeModify(BB->front(), *Load, Loc)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. @@ -458,7 +472,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > I = idf_ext_begin(P, TranspBlocks), E = idf_ext_end(P, TranspBlocks); I != E; ++I) - if (AA.canBasicBlockModify(**I, Arg, LoadSize)) + if (AA.canBasicBlockModify(**I, Loc)) return false; } } @@ -694,6 +708,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); + // Transfer the TBAA info too. + newLoad->setMetadata(LLVMContext::MD_tbaa, + OrigLoad->getMetadata(LLVMContext::MD_tbaa)); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 64e8d79..a21efce 100644 --- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -33,7 +33,9 @@ STATISTIC(NumMerged, "Number of global constants merged"); namespace { struct ConstantMerge : public ModulePass { static char ID; // Pass identification, replacement for typeid - ConstantMerge() : ModulePass(ID) {} + ConstantMerge() : ModulePass(ID) { + initializeConstantMergePass(*PassRegistry::getPassRegistry()); + } // run - For this pass, process all of the globals in the module, // eliminating duplicate constants. @@ -44,7 +46,7 @@ namespace { char ConstantMerge::ID = 0; INITIALIZE_PASS(ConstantMerge, "constmerge", - "Merge Duplicate Global Constants", false, false); + "Merge Duplicate Global Constants", false, false) ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } @@ -63,6 +65,18 @@ static void FindUsedValues(GlobalVariable *LLVMUsed, UsedValues.insert(GV); } +// True if A is better than B. +static bool IsBetterCannonical(const GlobalVariable &A, + const GlobalVariable &B) { + if (!A.hasLocalLinkage() && B.hasLocalLinkage()) + return true; + + if (A.hasLocalLinkage() && !B.hasLocalLinkage()) + return false; + + return A.hasUnnamedAddr(); +} + bool ConstantMerge::runOnModule(Module &M) { // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; @@ -83,44 +97,76 @@ bool ConstantMerge::runOnModule(Module &M) { // second level constants have initializers which point to the globals that // were just merged. while (1) { - // First pass: identify all globals that can be merged together, filling in - // the Replacements vector. We cannot do the replacement in this pass - // because doing so may cause initializers of other globals to be rewritten, - // invalidating the Constant* pointers in CMap. - // + + // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; - + // If this GV is dead, remove it. GV->removeDeadConstantUsers(); if (GV->use_empty() && GV->hasLocalLinkage()) { GV->eraseFromParent(); continue; } - - // Only process constants with initializers in the default addres space. - if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || - GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() || + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV)) continue; - - - + Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. GlobalVariable *&Slot = CMap[Init]; - if (Slot == 0) { // Nope, add it to the map. + // If this is the first constant we find or if the old on is local, + // replace with the current one. It the current is externally visible + // it cannot be replace, but can be the canonical constant we merge with. + if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) { Slot = GV; - } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! - // Make all uses of the duplicate constant use the canonical version. - Replacements.push_back(std::make_pair(GV, Slot)); } } + // Second: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) + continue; + + // We can only replace constant with local linkage. + if (!GV->hasLocalLinkage()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *Slot = CMap[Init]; + + if (!Slot || Slot == GV) + continue; + + if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr()) + continue; + + if (!GV->hasUnnamedAddr()) + Slot->setUnnamedAddr(false); + + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } + if (Replacements.empty()) return MadeChange; CMap.clear(); @@ -133,6 +179,8 @@ bool ConstantMerge::runOnModule(Module &M) { Replacements[i].first->replaceAllUsesWith(Replacements[i].second); // Delete the global value from the module. + assert(Replacements[i].first->hasLocalLinkage() && + "Refusing to delete an externally visible global variable."); Replacements[i].first->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 47df235..b423221 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -39,7 +39,8 @@ using namespace llvm; STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); - +STATISTIC(NumArgumentsReplacedWithUndef, + "Number of unread args replaced with undef"); namespace { /// DAE - The dead argument elimination pass. /// @@ -126,7 +127,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(ID) {} + DAE() : ModulePass(ID) { + initializeDAEPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -146,12 +149,13 @@ namespace { void PropagateLiveness(const RetOrArg &RA); bool RemoveDeadStuffFromFunction(Function *F); bool DeleteDeadVarargs(Function &Fn); + bool RemoveDeadArgumentsFromCallers(Function &Fn); }; } char DAE::ID = 0; -INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false); +INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) namespace { /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but @@ -168,7 +172,7 @@ namespace { char DAH::ID = 0; INITIALIZE_PASS(DAH, "deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", - false, false); + false, false) /// createDeadArgEliminationPass - This pass removes arguments from functions /// which are not used by the body of the function. @@ -285,6 +289,55 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { return true; } +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any +/// arguments that are unused, and changes the caller parameters to be undefined +/// instead. +bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) +{ + if (Fn.isDeclaration()) + return false; + + // Functions with local linkage should already have been handled. + if (Fn.hasLocalLinkage()) + return false; + + if (Fn.use_empty()) + return false; + + llvm::SmallVector<unsigned, 8> UnusedArgs; + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); + I != E; ++I) { + Argument *Arg = I; + + if (Arg->use_empty() && !Arg->hasByValAttr()) + UnusedArgs.push_back(Arg->getArgNo()); + } + + if (UnusedArgs.empty()) + return false; + + bool Changed = false; + + for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); + I != E; ++I) { + CallSite CS(*I); + if (!CS || !CS.isCallee(I)) + continue; + + // Now go through all unused args and replace them with "undef". + for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { + unsigned ArgNo = UnusedArgs[I]; + + Value *Arg = CS.getArgument(ArgNo); + CS.setArgument(ArgNo, UndefValue::get(Arg->getType())); + ++NumArgumentsReplacedWithUndef; + Changed = true; + } + } + + return Changed; +} + /// Convenience function that returns the number of return values. It returns 0 /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. @@ -791,7 +844,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } else if (New->getType()->isVoidTy()) { // Our return value has uses, but they will get removed later on. // Replace by null for now. - Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + if (!Call->getType()->isX86_MMXTy()) + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { assert(RetTy->isStructTy() && "Return type changed, but not into a void. The old return type" @@ -854,7 +908,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } else { // If this argument is dead, replace any uses of it with null constants // (these are guaranteed to become unused later on). - I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + if (!I->getType()->isX86_MMXTy()) + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); } // If we change the return value of the function we must rewrite any return @@ -935,5 +990,14 @@ bool DAE::runOnModule(Module &M) { Function *F = I++; Changed |= RemoveDeadStuffFromFunction(F); } + + // Finally, look for any unused parameters in functions with non-local + // linkage and replace the passed in parameters with undef. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function& F = *I; + + Changed |= RemoveDeadArgumentsFromCallers(F); + } + return Changed; } diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp index 5dc50c5..a509931 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -26,7 +26,9 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); namespace { struct DTE : public ModulePass { static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(ID) {} + DTE() : ModulePass(ID) { + initializeDTEPass(*PassRegistry::getPassRegistry()); + } // doPassInitialization - For this pass, it removes global symbol table // entries for primitive types. These are never used for linking in GCC and @@ -45,7 +47,10 @@ namespace { } char DTE::ID = 0; -INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false); +INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination", + false, false) +INITIALIZE_PASS_DEPENDENCY(FindUsedTypes) +INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false) ModulePass *llvm::createDeadTypeEliminationPass() { return new DTE(); diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp index 45c5fe7..9d432de 100644 --- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -50,24 +50,22 @@ namespace { // Visit the GlobalVariables. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - if (I->hasLocalLinkage()) - I->setVisibility(GlobalValue::HiddenVisibility); - I->setLinkage(GlobalValue::ExternalLinkage); - if (deleteStuff == Named.count(I)) - I->setInitializer(0); - } + I != E; ++I) { + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) + I->setInitializer(0); + } // Visit the Functions. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) { - if (I->hasLocalLinkage()) - I->setVisibility(GlobalValue::HiddenVisibility); - I->setLinkage(GlobalValue::ExternalLinkage); - if (deleteStuff == Named.count(I)) - I->deleteBody(); - } + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) + I->deleteBody(); + } return true; } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 6165ba0..95decec 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -23,10 +23,10 @@ #include "llvm/CallGraphSCCPass.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" @@ -41,7 +41,9 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID) {} + FunctionAttrs() : CallGraphSCCPass(ID), AA(0) { + initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -61,67 +63,25 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); CallGraphSCCPass::getAnalysisUsage(AU); } - bool PointsToLocalMemory(Value *V); + private: + AliasAnalysis *AA; }; } char FunctionAttrs::ID = 0; -INITIALIZE_PASS(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false); +INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } -/// PointsToLocalMemory - Returns whether the given pointer value points to -/// memory that is local to the function. Global constants are considered -/// local to all functions. -bool FunctionAttrs::PointsToLocalMemory(Value *V) { - SmallVector<Value*, 16> Worklist; - unsigned MaxLookup = 8; - - Worklist.push_back(V); - - do { - V = Worklist.pop_back_val()->getUnderlyingObject(); - - // An alloca instruction defines local memory. - if (isa<AllocaInst>(V)) - continue; - - // A global constant counts as local memory for our purposes. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (!GV->isConstant()) - return false; - continue; - } - - // If both select values point to local memory, then so does the select. - if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - Worklist.push_back(SI->getTrueValue()); - Worklist.push_back(SI->getFalseValue()); - continue; - } - - // If all values incoming to a phi node point to local memory, then so does - // the phi. - if (PHINode *PN = dyn_cast<PHINode>(V)) { - // Don't bother inspecting phi nodes with many operands. - if (PN->getNumIncomingValues() > MaxLookup) - return false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); - continue; - } - - return false; - } while (!Worklist.empty() && --MaxLookup); - - return Worklist.empty(); -} - /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; @@ -141,14 +101,15 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // External node - may write memory. Just give up. return false; - if (F->doesNotAccessMemory()) + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); + if (MRB == AliasAnalysis::DoesNotAccessMemory) // Already perfect! continue; // Definitions with weak linkage may be overridden at linktime with // something that writes memory, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) { - if (!F->onlyReadsMemory()) + if (!AliasAnalysis::onlyReadsMemory(MRB)) // May write memory. Just give up. return false; @@ -163,32 +124,62 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); - if (CS && CS.getCalledFunction()) { + if (CS) { // Ignore calls to functions in the same SCC. - if (SCCNodes.count(CS.getCalledFunction())) + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) continue; - // Ignore intrinsics that only access local memory. - if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) - if (AliasAnalysis::getIntrinsicModRefBehavior(id) == - AliasAnalysis::AccessesArguments) { - // Check that all pointer arguments point to local memory. + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); + // If the call doesn't access arbitrary memory, we may be able to + // figure out something. + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + // If the call does access argument pointees, check each argument. + if (AliasAnalysis::doesAccessArgPointees(MRB)) + // Check whether all pointer arguments point to local memory, and + // ignore calls that only access local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; - if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg)) - // Writes memory. Just give up. - return false; + if (Arg->getType()->isPointerTy()) { + AliasAnalysis::Location Loc(Arg, + AliasAnalysis::UnknownSize, + I->getMetadata(LLVMContext::MD_tbaa)); + if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { + if (MRB & AliasAnalysis::Mod) + // Writes non-local memory. Give up. + return false; + if (MRB & AliasAnalysis::Ref) + // Ok, it reads non-local memory. + ReadsMemory = true; + } + } } - // Only reads and writes local memory. - continue; - } - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore loads from local memory. - if (PointsToLocalMemory(LI->getPointerOperand())) continue; + } + // The call could access any memory. If that includes writes, give up. + if (MRB & AliasAnalysis::Mod) + return false; + // If it reads, note it. + if (MRB & AliasAnalysis::Ref) + ReadsMemory = true; + continue; + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads from local memory. + if (!LI->isVolatile()) { + AliasAnalysis::Location Loc = AA->getLocation(LI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Ignore stores to local memory. - if (PointsToLocalMemory(SI->getPointerOperand())) + // Ignore non-volatile stores to local memory. + if (!SI->isVolatile()) { + AliasAnalysis::Location Loc = AA->getLocation(SI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { + // Ignore vaargs on local memory. + AliasAnalysis::Location Loc = AA->getLocation(VI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } @@ -198,10 +189,6 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Writes memory. Just give up. return false; - if (isMalloc(I)) - // malloc claims not to write memory! PR3754. - return false; - // If this instruction may read memory, remember that. ReadsMemory |= I->mayReadFromMemory(); } @@ -384,6 +371,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { } bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { + AA = &getAnalysis<AliasAnalysis>(); + bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp index aa18601..2b427aa 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -31,7 +31,9 @@ STATISTIC(NumVariables, "Number of global variables removed"); namespace { struct GlobalDCE : public ModulePass { static char ID; // Pass identification, replacement for typeid - GlobalDCE() : ModulePass(ID) {} + GlobalDCE() : ModulePass(ID) { + initializeGlobalDCEPass(*PassRegistry::getPassRegistry()); + } // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. @@ -52,7 +54,7 @@ namespace { char GlobalDCE::ID = 0; INITIALIZE_PASS(GlobalDCE, "globaldce", - "Dead Global Elimination", false, false); + "Dead Global Elimination", false, false) ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index a77af54..d4cb712 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -40,6 +40,7 @@ using namespace llvm; STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); @@ -55,11 +56,14 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); namespace { + struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(ID) {} + GlobalOpt() : ModulePass(ID) { + initializeGlobalOptPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -69,13 +73,16 @@ namespace { bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, + const SmallPtrSet<const PHINode*, 16> &PHIUsers, + const GlobalStatus &GS); }; } char GlobalOpt::ID = 0; INITIALIZE_PASS(GlobalOpt, "globalopt", - "Global Variable Optimizer", false, false); + "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -85,6 +92,9 @@ namespace { /// about it. If we find out that the address of the global is taken, none of /// this info will be accurate. struct GlobalStatus { + /// isCompared - True if the global's address is used in a comparison. + bool isCompared; + /// isLoaded - True if the global is ever loaded. If the global isn't ever /// loaded it can be deleted. bool isLoaded; @@ -129,10 +139,11 @@ struct GlobalStatus { /// HasPHIUser - Set to true if this global has a user that is a PHI node. bool HasPHIUser; - - GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), - AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false) {} + + GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + HasPHIUser(false) {} }; } @@ -165,6 +176,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, const User *U = *UI; if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) return true; + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { @@ -221,7 +237,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { - // Nothing to analyse. + GS.isCompared = true; } else if (isa<MemTransferInst>(I)) { const MemTransferInst *MTI = cast<MemTransferInst>(I); if (MTI->getArgOperand(0) == V) @@ -308,7 +324,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit); - } else if (CE->getOpcode() == Instruction::BitCast && + } else if (CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. Changed |= CleanupConstantGlobalUsers(CE, 0); @@ -324,7 +340,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -361,7 +377,7 @@ static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast<Constant>(V)) return SafeToDestroyConstant(C); - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; @@ -371,15 +387,15 @@ static bool isSafeSROAElementUse(Value *V) { // Stores *to* the pointer are ok. if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getOperand(0) != V; - + // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); if (GEPI == 0) return false; - + if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || !cast<Constant>(GEPI->getOperand(1))->isNullValue()) return false; - + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); I != E; ++I) if (!isSafeSROAElementUse(*I)) @@ -393,11 +409,11 @@ static bool isSafeSROAElementUse(Value *V) { /// static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa<GetElementPtrInst>(U) && - (!isa<ConstantExpr>(U) || + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) return false; - + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we // don't like < 3 operand CE's, and we don't like non-constant integer // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some @@ -409,18 +425,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); ++GEPI; // Skip over the pointer index. - + // If this is a use of an array allocation, do a bit more checking for sanity. if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); - + // Check to make sure that index falls within the array. If not, // something funny is going on, so we won't do the optimization. // if (Idx->getZExtValue() >= NumElements) return false; - + // We cannot scalar repl this level of the array unless any array // sub-indices are in-range constants. In particular, consider: // A[0][i]. We cannot know that the user isn't doing invalid things like @@ -441,7 +457,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { "Indexed GEP type is not array, vector, or struct!"); continue; } - + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) return false; @@ -465,7 +481,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { } return true; } - + /// SRAGlobal - Perform scalar replacement of aggregates on the specified global /// variable. This opens the door for other optimizations by exposing the @@ -476,7 +492,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; - + assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); const Type *Ty = Init->getType(); @@ -488,7 +504,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - + if (const StructType *STy = dyn_cast<StructType>(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); @@ -503,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -522,7 +538,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NumElements > 16 && GV->hasNUsesOrMore(16)) return 0; // It's not worth it. NewGlobals.reserve(NumElements); - + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { @@ -537,7 +553,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -549,7 +565,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -615,7 +631,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any +/// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSet<const PHINode*, 8> &PHIs) { @@ -757,7 +773,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // Keep track of whether we are able to remove all the uses of the global // other than the store that defines it. bool AllNonStoreUsesGone = true; - + // Replace all uses of loads with uses of uses of the stored value. for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; @@ -830,7 +846,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, ConstantInt *NElements, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - + const Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; @@ -840,14 +856,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - + // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. @@ -867,10 +883,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, User->replaceUsesOfWith(CI, TheBC); } } - + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, + RepValue = ConstantExpr::getBitCast(RepValue, GV->getType()->getElementType()); // If there is a comparison against null, we will insert a global bool to @@ -890,7 +906,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, SI->eraseFromParent(); continue; } - + LoadInst *LI = cast<LoadInst>(GV->use_back()); while (!LI->use_empty()) { Use &LoadUse = LI->use_begin().getUse(); @@ -898,7 +914,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, LoadUse = RepValue; continue; } - + ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); // Replace the cmp X, 0 with a use of the bool value. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); @@ -963,20 +979,20 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { continue; // Fine, ignore. } - + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - + // Must index into the array and into the struct. if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. @@ -985,13 +1001,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, return false; continue; } - + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; } - + return false; } return true; @@ -1000,9 +1016,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV /// somewhere. Transform all uses of the allocation into loads from the /// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. -static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { while (!Alloc->use_empty()) { Instruction *U = cast<Instruction>(*Alloc->use_begin()); @@ -1035,7 +1051,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, continue; } } - + // Insert a load from the global, and use it instead of the malloc. Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); U->replaceUsesOfWith(Alloc, NL); @@ -1053,24 +1069,24 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { const Instruction *User = cast<Instruction>(*UI); - + // Comparison against null is ok. if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return false; continue; } - + // getelementptr is also ok, but only a simple form. if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; - + // Otherwise the GEP is ok. continue; } - + if (const PHINode *PN = dyn_cast<PHINode>(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. @@ -1079,19 +1095,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, if (!LoadUsingPHIs.insert(PN)) // If we have already analyzed this PHI, then it is safe. continue; - + // Make sure all uses of the PHI are simple enough to transform. if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; - + continue; } - + // Otherwise we don't know what this is, not ok. return false; } - + return true; } @@ -1110,10 +1126,10 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, return false; LoadUsingPHIsPerLoad.clear(); } - + // If we reach here, we know that all uses of the loads and transitive uses // (through PHI nodes) are simple enough to transform. However, we don't know - // that all inputs the to the PHI nodes are in the same equivalence sets. + // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() @@ -1121,29 +1137,29 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); - + // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - + if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; return false; } - + // Load from GV is ok. if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) if (LI->getOperand(0) == GV) continue; - + // UNDEF? NULL? - + // Anything else is rejected. return false; } } - + return true; } @@ -1151,15 +1167,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; - + if (FieldNo >= FieldVals.size()) FieldVals.resize(FieldNo+1); - + // If we already have this value, just reuse the previously scalarized // version. if (Value *FieldVal = FieldVals[FieldNo]) return FieldVal; - + // Depending on what instruction this is, we have several cases. Value *Result; if (LoadInst *LI = dyn_cast<LoadInst>(V)) { @@ -1172,9 +1188,9 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + const StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); - + Result = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), PN->getName()+".f"+Twine(FieldNo), PN); @@ -1183,13 +1199,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, llvm_unreachable("Unknown usable value"); Result = 0; } - + return FieldVals[FieldNo] = Result; } /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from /// the load, rewrite the derived value to use the HeapSRoA'd load. -static void RewriteHeapSROALoadUser(Instruction *LoadUser, +static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { // If this is a comparison against null, handle it. @@ -1199,30 +1215,30 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, InsertedScalarizedValues, PHIsToRewrite); - + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), + Constant::getNullValue(NPtr->getType()), SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; } - + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) && "Unexpected GEPI!"); - + // Load the pointer for this field. unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, InsertedScalarizedValues, PHIsToRewrite); - + // Create the new GEP idx vector. SmallVector<Value*, 8> GEPIdx; GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx.begin(), GEPIdx.end(), GEPI->getName(), GEPI); @@ -1243,7 +1259,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, tie(InsertPos, Inserted) = InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); if (!Inserted) return; - + // If this is the first time we've seen this PHI, recursively process all // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { @@ -1256,7 +1272,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// is a value loaded from the global. Eliminate all uses of Ptr, making them /// use FieldGlobals instead. All uses of loaded values satisfy /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. -static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); @@ -1264,7 +1280,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, Instruction *User = cast<Instruction>(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } - + if (Load->use_empty()) { Load->eraseFromParent(); InsertedScalarizedValues.erase(Load); @@ -1289,11 +1305,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // new mallocs at the same place as CI, and N globals. std::vector<Value*> FieldGlobals; std::vector<Value*> FieldMallocs; - + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ const Type *FieldTy = STy->getElementType(FieldNo); const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); - + GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, @@ -1301,7 +1317,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); if (const StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); @@ -1313,7 +1329,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); } - + // The tricky aspect of this transformation is handling the case when malloc // fails. In the original code, malloc failing would set the result pointer // of malloc to null. In this case, some mallocs could succeed and others @@ -1340,23 +1356,23 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); - + // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), "malloc_ret_null", OrigBB->getParent()); - + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond // branch on RunningOr. OrigBB->getTerminator()->eraseFromParent(); BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - + // Within the NullPtrBlock, we need to emit a comparison and branch for each // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), "tmp"); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", @@ -1371,10 +1387,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], FreeBlock); BranchInst::Create(NextBlock, FreeBlock); - + NullPtrBlock = NextBlock; } - + BranchInst::Create(ContBB, NullPtrBlock); // CI is no longer needed, remove it. @@ -1385,25 +1401,25 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// inserted for a given load. DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; - + std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; - + // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads // of the per-field globals instead. for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); - + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } - + // Must be a store of null. StoreInst *SI = cast<StoreInst>(User); assert(isa<ConstantPointerNull>(SI->getOperand(0)) && "Unexpected heap-sra user!"); - + // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); @@ -1430,7 +1446,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } - + // Drop all inter-phi links and any loads that made it this far. for (DenseMap<Value*, std::vector<Value*> >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1440,7 +1456,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) LI->dropAllReferences(); } - + // Delete all the phis and loads now that inter-references are dead. for (DenseMap<Value*, std::vector<Value*> >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1450,7 +1466,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) LI->eraseFromParent(); } - + // The old global is now dead, remove it. GV->eraseFromParent(); @@ -1468,7 +1484,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, TargetData *TD) { if (!TD) return false; - + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1508,7 +1524,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); return true; } - + // If the allocation is an array of structures, consider transforming this // into multiple malloc'd arrays, one for each field. This is basically // SRoA for malloc'd memory. @@ -1544,13 +1560,13 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = dyn_cast<BitCastInst>(Malloc) ? extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); } - + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); return true; } - + return false; -} +} // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. @@ -1568,7 +1584,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. @@ -1576,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { const Type* MallocType = getMallocAllocatedType(CI); - if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, GVI, TD)) return true; } @@ -1591,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { const Type *GVElType = GV->getType()->getElementType(); - + // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select // between them is very expensive and unlikely to lead to later @@ -1611,11 +1627,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); - + // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, - GlobalValue::InternalLinkage, + GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); @@ -1684,10 +1700,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// ProcessInternalGlobal - Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { - SmallPtrSet<const PHINode*, 16> PHIUsers; - GlobalStatus GS; +bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + if (!GV->hasLocalLinkage()) + return false; + + // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); if (GV->use_empty()) { @@ -1697,140 +1715,139 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } - if (!AnalyzeGlobal(GV, GS, PHIUsers)) { -#if 0 - DEBUG(dbgs() << "Global: " << *GV); - DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(dbgs() << " StoredType = "); - switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); - break; - case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; - } - if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); - if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " - << GS.AccessingFunction->getName() << "\n"); - DEBUG(dbgs() << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(dbgs() << " HasNonInstructionUser = " - << GS.HasNonInstructionUser<<"\n"); - DEBUG(dbgs() << "\n"); -#endif - - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive we can make - // this global a local variable) we replace the global with a local alloca - // in this function. - // - // NOTE: It doesn't make sense to promote non single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction - ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); - if (!isa<UndefValue>(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); - - GV->replaceAllUsesWith(Alloca); + SmallPtrSet<const PHINode*, 16> PHIUsers; + GlobalStatus GS; + + if (AnalyzeGlobal(GV, GS, PHIUsers)) + return false; + + if (!GS.isCompared && !GV->hasUnnamedAddr()) { + GV->setUnnamedAddr(true); + NumUnnamed++; + } + + if (GV->isConstant() || !GV->hasInitializer()) + return false; + + return ProcessInternalGlobal(GV, GVI, PHIUsers, GS); +} + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI, + const SmallPtrSet<const PHINode*, 16> &PHIUsers, + const GlobalStatus &GS) { + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction + ->getEntryBlock().begin()); + const Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + if (!isa<UndefValue>(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { GV->eraseFromParent(); - ++NumLocalized; - return true; + ++NumDeleted; + Changed = true; } - - // If the global is never loaded (but may be stored to), it is dead. - // Delete it now. - if (!GS.isLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); - - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - // If the global is dead now, delete it. - if (GV->use_empty()) { - GV->eraseFromParent(); - ++NumDeleted; - Changed = true; - } - return Changed; + return Changed; - } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); - GV->setConstant(true); + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + GV->setConstant(true); - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); - // If the global is dead now, just nuke it. - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (isa<UndefValue>(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; } - ++NumMarked; + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysisIfAvailable<TargetData>())) return true; - } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } - } else if (GS.StoredType == GlobalStatus::isStoredOnce) { - // If the initial value for the global was an undef value, and if only - // one other value was stored into it, we can just change the - // initializer to be the stored value, then delete all stores to the - // global. This allows us to mark it constant. - if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (isa<UndefValue>(GV->getInitializer())) { - // Change the initial value here. - GV->setInitializer(SOVConstant); - - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } else { - GVI = GV; - } - ++NumSubstitute; - return true; - } - // Try to optimize globals based on the knowledge that only one value - // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable<TargetData>())) + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; return true; - - // Otherwise, if the global was not a boolean, we can shrink it to be a - // boolean. - if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { - ++NumShrunkToBool; - return true; - } - } + } } + return false; } @@ -1917,10 +1934,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { if (New && New != CE) GV->setInitializer(New); } - // Do more involved optimizations if the global is internal. - if (!GV->isConstant() && GV->hasLocalLinkage() && - GV->hasInitializer()) - Changed |= ProcessInternalGlobal(GV, GVI); + + Changed |= ProcessGlobal(GV, GVI); } return Changed; } @@ -1928,46 +1943,47 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { /// FindGlobalCtors - Find the llvm.globalctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->getName() == "llvm.global_ctors") { - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - - // Verify that the initializer is simple enough for us to handle. - if (!I->hasDefinitiveInitializer()) return 0; - ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); - if (!CA) return 0; - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { - if (isa<ConstantPointerNull>(CS->getOperand(1))) - continue; + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (GV == 0) return 0; + + // Found it, verify it's an array of { int, void()* }. + const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType()); + if (!ATy) return 0; + const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); + if (!STy || STy->getNumElements() != 2 || + !STy->getElementType(0)->isIntegerTy(32)) return 0; + const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); + if (!PFTy) return 0; + const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); + if (!FTy || !FTy->getReturnType()->isVoidTy() || + FTy->isVarArg() || FTy->getNumParams() != 0) + return 0; - // Must have a function or null ptr. - if (!isa<Function>(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) - return 0; - } else { - return 0; - } - - return I; - } - return 0; + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) return 0; + + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) return 0; + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(*i); + if (CS == 0) return 0; + + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!CI || CI->getZExtValue() != 65535) + return 0; + } + + return GV; } /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, @@ -1985,13 +2001,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector<Function*> &Ctors) { // If we made a change, reassemble the initializer list. std::vector<Constant*> CSVals; CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); CSVals.push_back(0); - + // Create the new init list. std::vector<Constant*> CAList; for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { @@ -2007,26 +2023,26 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } - + // Create the array initializer. const Type *StructTy = cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); - + // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { GCL->setInitializer(CA); return GCL; } - + // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); - + // Nuke the old list, replacing any uses with the new one. if (!GCL->use_empty()) { Constant *V = NGV; @@ -2035,7 +2051,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, GCL->replaceAllUsesWith(V); } GCL->eraseFromParent(); - + if (Ctors.size()) return NGV; else @@ -2043,17 +2059,86 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } -static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, - Value *V) { +static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) { if (Constant *CV = dyn_cast<Constant>(V)) return CV; Constant *R = ComputedValues[V]; assert(R && "Reference to an uncomputed value!"); return R; } +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants); + + +/// isSimpleEnoughValueToCommit - Return true if the specified constant can be +/// handled by the code generator. We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants) { + // Simple integer, undef, constant aggregate zero, global addresses, etc are + // all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || + isa<GlobalValue>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || + isa<ConstantVector>(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + Constant *Op = cast<Constant>(C->getOperand(i)); + if (!isSimpleEnoughValueToCommit(Op, SimpleConstants)) + return false; + } + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // These casts are always fine if the casted value is. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C)) return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants); +} + + /// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast of something, -/// we punt. We basically just support direct accesses to globals and GEP's of +/// enough for us to understand. In particular, if it is a cast to anything +/// other than from one pointer type to another pointer type, we punt. +/// We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't @@ -2062,19 +2147,19 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - return GV->hasDefinitiveInitializer(); + return GV->hasUniqueInitializer(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && isa<GlobalVariable>(CE->getOperand(0)) && cast<GEPOperator>(CE)->isInBounds()) { GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - if (!GV->hasDefinitiveInitializer()) + if (!GV->hasUniqueInitializer()) return false; // The first index must be zero. @@ -2087,7 +2172,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa<GlobalVariable>(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); } + } + return false; } @@ -2101,7 +2197,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, assert(Val->getType() == Init->getType() && "Type mismatch!"); return Val; } - + std::vector<Constant*> Elts; if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { @@ -2119,13 +2215,13 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } - + // Replace the element that we are supposed to. ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - + // Return the modified struct. return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), STy->isPacked()); @@ -2138,8 +2234,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, NumElts = ATy->getNumElements(); else NumElts = cast<VectorType>(InitTy)->getNumElements(); - - + + // Break up the array into elements. if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) @@ -2154,16 +2250,15 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, " ConstantFoldLoadThroughGEPConstantExpr"); Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); } - + assert(CI->getZExtValue() < NumElts); Elts[CI->getZExtValue()] = EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - + if (Init->getType()->isArrayTy()) return ConstantArray::get(cast<ArrayType>(InitTy), Elts); - else - return ConstantVector::get(&Elts[0], Elts.size()); - } + return ConstantVector::get(Elts); + } } /// CommitValueTo - We have decided that Addr (which satisfies the predicate @@ -2189,14 +2284,14 @@ static Constant *ComputeLoadResult(Constant *P, // is the most up-to-date. DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); if (I != Memory.end()) return I->second; - + // Access it. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } - + // Handle a constantexpr getelementptr. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -2216,17 +2311,19 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl<Constant*> &ActualArgs, std::vector<Function*> &CallStack, DenseMap<Constant*, Constant*> &MutatedMemory, - std::vector<GlobalVariable*> &AllocaTmps) { + std::vector<GlobalVariable*> &AllocaTmps, + SmallPtrSet<Constant*, 8> &SimpleConstants, + const TargetData *TD) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; - + CallStack.push_back(F); - + /// Values - As we compute SSA register values, we store their contents here. DenseMap<Value*, Constant*> Values; - + // Initialize arguments to the incoming values specified. unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; @@ -2237,21 +2334,65 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// we can only evaluate any one basic block at most once. This set keeps /// track of what we have executed so we can detect recursive cases etc. SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; - + // CurInst - The current instruction we're evaluating. BasicBlock::iterator CurInst = F->begin()->begin(); - + // This is the main evaluation loop. while (1) { Constant *InstResult = 0; - + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants)) + return false; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->getOpcode() == Instruction::BitCast) { + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (const StructType *STy = dyn_cast<StructType>(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + return 0; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + } + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), @@ -2290,7 +2431,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); - InstResult = AllocaTmps.back(); + InstResult = AllocaTmps.back(); } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { // Debug info can safely be ignored here. @@ -2324,11 +2465,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else { if (Callee->getFunctionType()->isVarArg()) return false; - + Constant *RetVal; // Execute the call, if successful, use the return value. if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps)) + MutatedMemory, AllocaTmps, SimpleConstants, TD)) return false; InstResult = RetVal; } @@ -2342,7 +2483,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); if (!Cond) return false; // Cannot determine. - NewBB = BI->getSuccessor(!Cond->getZExtValue()); + NewBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { ConstantInt *Val = @@ -2358,20 +2499,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { if (RI->getNumOperands()) RetVal = getVal(Values, RI->getOperand(0)); - + CallStack.pop_back(); // return from fn. return true; // We succeeded at evaluating this ctor! } else { // invoke, unwind, unreachable. return false; // Cannot handle this terminator. } - + // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. if (!ExecutedBlocks.insert(NewBB)) return false; // looped! - + // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. @@ -2387,10 +2528,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // Did not know how to evaluate this! return false; } - - if (!CurInst->use_empty()) + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, TD); + Values[CurInst] = InstResult; - + } + // Advance program counter. ++CurInst; } @@ -2398,7 +2543,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F) { +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { /// MutatedMemory - For each store we execute, we update this map. Loads /// check this to get the most up-to-date value. If evaluation is successful, /// this state is committed to the process. @@ -2408,17 +2553,23 @@ static bool EvaluateStaticConstructor(Function *F) { /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. std::vector<GlobalVariable*> AllocaTmps; - + /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing /// unbounded. std::vector<Function*> CallStack; + /// SimpleConstants - These are constants we have checked and know to be + /// simple enough to live in a static initializer of a global. + SmallPtrSet<Constant*, 8> SimpleConstants; + // Call the function. Constant *RetValDummy; bool EvalSuccess = EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>(), CallStack, - MutatedMemory, AllocaTmps); + MutatedMemory, AllocaTmps, + SimpleConstants, TD); + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" @@ -2428,13 +2579,13 @@ static bool EvaluateStaticConstructor(Function *F) { E = MutatedMemory.end(); I != E; ++I) CommitValueTo(I->second, I->first); } - + // At this point, we are done interpreting. If we created any 'alloca' // temporaries, release them now. while (!AllocaTmps.empty()) { GlobalVariable *Tmp = AllocaTmps.back(); AllocaTmps.pop_back(); - + // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. @@ -2442,7 +2593,7 @@ static bool EvaluateStaticConstructor(Function *F) { Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); delete Tmp; } - + return EvalSuccess; } @@ -2454,7 +2605,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { std::vector<Function*> Ctors = ParseGlobalCtors(GCL); bool MadeChange = false; if (Ctors.empty()) return false; - + + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2467,12 +2619,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { } break; } - + // We cannot simplify external ctor functions. if (F->empty()) continue; - + // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F)) { + if (EvaluateStaticConstructor(F, TD)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; @@ -2480,9 +2632,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { continue; } } - + if (!MadeChange) return false; - + GCL = InstallGlobalCtors(GCL, Ctors); return true; } @@ -2546,21 +2698,21 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - + // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M); - + // Optimize global_ctors list. if (GlobalCtors) LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); - + // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); @@ -2568,9 +2720,9 @@ bool GlobalOpt::runOnModule(Module &M) { LocalChange |= OptimizeGlobalAliases(M); Changed |= LocalChange; } - + // TODO: Move all global ctors functions to the end of the module for code // layout. - + return Changed; } diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index 1b3cf78..c7c2939 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -35,7 +35,9 @@ namespace { /// struct IPCP : public ModulePass { static char ID; // Pass identification, replacement for typeid - IPCP() : ModulePass(ID) {} + IPCP() : ModulePass(ID) { + initializeIPCPPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); private: @@ -46,7 +48,7 @@ namespace { char IPCP::ID = 0; INITIALIZE_PASS(IPCP, "ipconstprop", - "Interprocedural constant propagation", false, false); + "Interprocedural constant propagation", false, false) ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp index 340b70e..fbe90ce 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -7,17 +7,51 @@ // //===----------------------------------------------------------------------===// // -// This file implements the C bindings for libLLVMIPO.a, which implements -// several transformations over the LLVM intermediate representation. +// This file implements the common infrastructure (including C bindings) for +// libLLVMIPO.a, which implements several transformations over the LLVM +// intermediate representation. // //===----------------------------------------------------------------------===// #include "llvm-c/Transforms/IPO.h" +#include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Transforms/IPO.h" using namespace llvm; +void llvm::initializeIPO(PassRegistry &Registry) { + initializeArgPromotionPass(Registry); + initializeConstantMergePass(Registry); + initializeDAEPass(Registry); + initializeDAHPass(Registry); + initializeDTEPass(Registry); + initializeFunctionAttrsPass(Registry); + initializeGlobalDCEPass(Registry); + initializeGlobalOptPass(Registry); + initializeIPCPPass(Registry); + initializeAlwaysInlinerPass(Registry); + initializeSimpleInlinerPass(Registry); + initializeInternalizePassPass(Registry); + initializeLoopExtractorPass(Registry); + initializeBlockExtractorPassPass(Registry); + initializeSingleLoopExtractorPass(Registry); + initializeLowerSetJmpPass(Registry); + initializeMergeFunctionsPass(Registry); + initializePartialInlinerPass(Registry); + initializePruneEHPass(Registry); + initializeStripDeadPrototypesPassPass(Registry); + initializeStripSymbolsPass(Registry); + initializeStripDebugDeclarePass(Registry); + initializeStripDeadDebugInfoPass(Registry); + initializeStripNonDebugSymbolsPass(Registry); + initializeSRETPromotionPass(Registry); +} + +void LLVMInitializeIPO(LLVMPassRegistryRef R) { + initializeIPO(*unwrap(R)); +} + void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createArgumentPromotionPass()); } diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp index ecc60ad..ce795b7 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -36,7 +36,9 @@ namespace { InlineCostAnalyzer CA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000) {} + AlwaysInliner() : Inliner(ID, -2000000000) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -61,8 +63,11 @@ namespace { } char AlwaysInliner::ID = 0; -INITIALIZE_PASS(AlwaysInliner, "always-inline", - "Inliner for always_inline functions", false, false); +INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp index 9c6637d..0c5b3be 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -33,8 +33,12 @@ namespace { SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - SimpleInliner() : Inliner(ID) {} - SimpleInliner(int Threshold) : Inliner(ID, Threshold) {} + SimpleInliner() : Inliner(ID) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + SimpleInliner(int Threshold) : Inliner(ID, Threshold) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -56,8 +60,11 @@ namespace { } char SimpleInliner::ID = 0; -INITIALIZE_PASS(SimpleInliner, "inline", - "Function Integration/Inlining", false, false); +INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index 4983e8e..37eafd7 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -52,7 +52,8 @@ Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} Inliner::Inliner(char &ID, int Threshold) - : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? + InlineLimit : Threshold) {} /// getAnalysisUsage - For this class, we declare that we require and preserve /// the call graph. If the derived class implements this method, it should @@ -74,7 +75,8 @@ InlinedArrayAllocasTy; /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas) { + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -91,7 +93,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); - // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. @@ -115,6 +116,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // SmallPtrSet<AllocaInst*, 16> UsedAllocas; + // When processing our SCC, check to see if CS was inlined from some other + // call site. For example, if we're processing "A" in this code: + // A() { B() } + // B() { x = alloca ... C() } + // C() { y = alloca ... } + // Assume that C was not inlined into B initially, and so we're processing A + // and decide to inline B into A. Doing this makes an alloca available for + // reuse and makes a callsite (C) available for inlining. When we process + // the C call site we don't want to do any alloca merging between X and Y + // because their scopes are not disjoint. We could make this smarter by + // keeping track of the inline history for each alloca in the + // InlinedArrayAllocas but this isn't likely to be a significant win. + if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. + return true; + // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); @@ -152,19 +168,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! - DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI); + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " + << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; + IFI.StaticAllocas[AllocaNo] = 0; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; - + // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark @@ -234,20 +252,25 @@ bool Inliner::shouldInline(CallSite CS) { if (Caller->hasLocalLinkage()) { int TotalSecondaryCost = 0; bool outerCallsFound = false; - bool allOuterCallsWillBeInlined = true; - bool someOuterCallWouldNotBeInlined = false; + // This bool tracks what happens if we do NOT inline C into B. + bool callerWillBeRemoved = true; + // This bool tracks what happens if we DO inline C into B. + bool inliningPreventsSomeOuterInline = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); I != E; ++I) { CallSite CS2(*I); // If this isn't a call to Caller (it could be some other sort - // of reference) skip it. - if (!CS2 || CS2.getCalledFunction() != Caller) + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2.getCalledFunction() != Caller) { + callerWillBeRemoved = false; continue; + } InlineCost IC2 = getInlineCost(CS2); if (IC2.isNever()) - allOuterCallsWillBeInlined = false; + callerWillBeRemoved = false; if (IC2.isAlways() || IC2.isNever()) continue; @@ -257,14 +280,14 @@ bool Inliner::shouldInline(CallSite CS) { float FudgeFactor2 = getInlineFudgeFactor(CS2); if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) - allOuterCallsWillBeInlined = false; + callerWillBeRemoved = false; // See if we have this case. We subtract off the penalty // for the call instruction, which we would be deleting. if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= (int)(CurrentThreshold2 * FudgeFactor2)) { - someOuterCallWouldNotBeInlined = true; + inliningPreventsSomeOuterInline = true; TotalSecondaryCost += Cost2; } } @@ -272,10 +295,10 @@ bool Inliner::shouldInline(CallSite CS) { // one is set very low by getInlineCost, in anticipation that Caller will // be removed entirely. We did not account for this above unless there // is only one caller of Caller. - if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) + if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - if (outerCallsFound && someOuterCallWouldNotBeInlined && + if (outerCallsFound && inliningPreventsSomeOuterInline && TotalSecondaryCost < Cost) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << Cost << @@ -401,7 +424,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee - // itself. If so, we'd be recursively inlinling the same function, + // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. int InlineHistoryID = CallSites[CSi].second; @@ -416,7 +439,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { continue; // Attempt to inline the function. - if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, + InlineHistoryID)) continue; ++NumInlined; diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp index a1d919f..9b9ebad 100644 --- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -64,10 +64,11 @@ namespace { char InternalizePass::ID = 0; INITIALIZE_PASS(InternalizePass, "internalize", - "Internalize Global Symbols", false, false); + "Internalize Global Symbols", false, false) InternalizePass::InternalizePass(bool AllButMain) : ModulePass(ID), AllButMain(AllButMain){ + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); if (!APIList.empty()) // If a list is specified, use it as well. @@ -76,6 +77,7 @@ InternalizePass::InternalizePass(bool AllButMain) InternalizePass::InternalizePass(const std::vector<const char *>&exportList) : ModulePass(ID), AllButMain(false){ + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); for(std::vector<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { ExternalNames.insert(*itr); diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index f88dff6..848944d 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -37,7 +37,9 @@ namespace { unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : LoopPass(ID), NumLoops(numLoops) {} + : LoopPass(ID), NumLoops(numLoops) { + initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -50,8 +52,13 @@ namespace { } char LoopExtractor::ID = 0; -INITIALIZE_PASS(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false); +INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) namespace { /// SingleLoopExtractor - For bugpoint. @@ -63,7 +70,7 @@ namespace { char SingleLoopExtractor::ID = 0; INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", - "Extract at most one loop into a new function", false, false); + "Extract at most one loop into a new function", false, false) // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. @@ -159,7 +166,7 @@ namespace { char BlockExtractorPass::ID = 0; INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)", - false, false); + false, false) // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp index 6c715de..b545f0b 100644 --- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp @@ -109,7 +109,9 @@ namespace { bool IsTransformableFunction(StringRef Name); public: static char ID; // Pass identification, replacement for typeid - LowerSetJmp() : ModulePass(ID) {} + LowerSetJmp() : ModulePass(ID) { + initializeLowerSetJmpPass(*PassRegistry::getPassRegistry()); + } void visitCallInst(CallInst& CI); void visitInvokeInst(InvokeInst& II); @@ -122,7 +124,7 @@ namespace { } // end anonymous namespace char LowerSetJmp::ID = 0; -INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false); +INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false) // run - Run the transformation on the program. We grab the function // prototypes for longjmp and setjmp. If they are used in the program, diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 5d838f9..cccffca 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -67,42 +67,87 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumThunksWritten, "Number of thunks generated"); +STATISTIC(NumAliasesWritten, "Number of aliases generated"); +STATISTIC(NumDoubleWeak, "Number of new functions created"); + +/// Creates a hash-code for the function which is the same for any two +/// functions that will compare equal, without looking at the instructions +/// inside the function. +static unsigned profileFunction(const Function *F) { + const FunctionType *FTy = F->getFunctionType(); -namespace { - /// MergeFunctions finds functions which will generate identical machine code, - /// by considering all pointer types to be equivalent. Once identified, - /// MergeFunctions will fold them by replacing a call to one to a call to a - /// bitcast of the other. - /// - class MergeFunctions : public ModulePass { - public: - static char ID; - MergeFunctions() : ModulePass(ID) {} - - bool runOnModule(Module &M); - - private: - /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G - /// may be deleted, or may be converted into a thunk. In either case, it - /// should never be visited again. - void MergeTwoFunctions(Function *F, Function *G) const; - - /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also - /// replace direct uses of G with bitcast(F). - void WriteThunk(Function *F, Function *G) const; - - TargetData *TD; - }; + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); } -char MergeFunctions::ID = 0; -INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false); +namespace { + +/// ComparableFunction - A struct that pairs together functions with a +/// TargetData so that we can keep them together as elements in the DenseSet. +class ComparableFunction { +public: + static const ComparableFunction EmptyKey; + static const ComparableFunction TombstoneKey; + static TargetData * const LookupOnly; + + ComparableFunction(Function *Func, TargetData *TD) + : Func(Func), Hash(profileFunction(Func)), TD(TD) {} + + Function *getFunc() const { return Func; } + unsigned getHash() const { return Hash; } + TargetData *getTD() const { return TD; } + + // Drops AssertingVH reference to the function. Outside of debug mode, this + // does nothing. + void release() { + assert(Func && + "Attempted to release function twice, or release empty/tombstone!"); + Func = NULL; + } + +private: + explicit ComparableFunction(unsigned Hash) + : Func(NULL), Hash(Hash), TD(NULL) {} + + AssertingVH<Function> Func; + unsigned Hash; + TargetData *TD; +}; + +const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); +const ComparableFunction ComparableFunction::TombstoneKey = + ComparableFunction(1); +TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1); -ModulePass *llvm::createMergeFunctionsPass() { - return new MergeFunctions(); +} + +namespace llvm { + template <> + struct DenseMapInfo<ComparableFunction> { + static ComparableFunction getEmptyKey() { + return ComparableFunction::EmptyKey; + } + static ComparableFunction getTombstoneKey() { + return ComparableFunction::TombstoneKey; + } + static unsigned getHashValue(const ComparableFunction &CF) { + return CF.getHash(); + } + static bool isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS); + }; } namespace { + /// FunctionComparator - Compares two functions to determine whether or not /// they will generate machine code with the same behaviour. TargetData is /// used if available. The comparator always fails conservatively (erring on the @@ -111,34 +156,34 @@ class FunctionComparator { public: FunctionComparator(const TargetData *TD, const Function *F1, const Function *F2) - : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {} + : F1(F1), F2(F2), TD(TD) {} - /// Compare - test whether the two functions have equivalent behaviour. - bool Compare(); + /// Test whether the two functions have equivalent behaviour. + bool compare(); private: - /// Compare - test whether two basic blocks have equivalent behaviour. - bool Compare(const BasicBlock *BB1, const BasicBlock *BB2); + /// Test whether two basic blocks have equivalent behaviour. + bool compare(const BasicBlock *BB1, const BasicBlock *BB2); - /// Enumerate - Assign or look up previously assigned numbers for the two - /// values, and return whether the numbers are equal. Numbers are assigned in - /// the order visited. - bool Enumerate(const Value *V1, const Value *V2); + /// Assign or look up previously assigned numbers for the two values, and + /// return whether the numbers are equal. Numbers are assigned in the order + /// visited. + bool enumerate(const Value *V1, const Value *V2); - /// isEquivalentOperation - Compare two Instructions for equivalence, similar - /// to Instruction::isSameOperationAs but with modifications to the type + /// Compare two Instructions for equivalence, similar to + /// Instruction::isSameOperationAs but with modifications to the type /// comparison. bool isEquivalentOperation(const Instruction *I1, const Instruction *I2) const; - /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic. + /// Compare two GEPs for equivalent pointer arithmetic. bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); bool isEquivalentGEP(const GetElementPtrInst *GEP1, const GetElementPtrInst *GEP2) { return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); } - /// isEquivalentType - Compare two Types, treating all pointer types as equal. + /// Compare two Types, treating all pointer types as equal. bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; // The two functions undergoing comparison. @@ -146,20 +191,26 @@ private: const TargetData *TD; - typedef DenseMap<const Value *, unsigned long> IDMap; - IDMap Map1, Map2; - unsigned long IDMap1Count, IDMap2Count; + DenseMap<const Value *, const Value *> id_map; + DenseSet<const Value *> seen_values; }; + } -/// isEquivalentType - any two pointers in the same address space are -/// equivalent. Otherwise, standard type equivalence rules apply. +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. bool FunctionComparator::isEquivalentType(const Type *Ty1, const Type *Ty2) const { if (Ty1 == Ty2) return true; - if (Ty1->getTypeID() != Ty2->getTypeID()) + if (Ty1->getTypeID() != Ty2->getTypeID()) { + if (TD) { + LLVMContext &Ctx = Ty1->getContext(); + if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true; + if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true; + } return false; + } switch(Ty1->getTypeID()) { default: @@ -167,6 +218,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, // Fall through in Release mode. case Type::IntegerTyID: case Type::OpaqueTyID: + case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -225,21 +277,18 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return ATy1->getNumElements() == ATy2->getNumElements() && isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); } - - case Type::VectorTyID: { - const VectorType *VTy1 = cast<VectorType>(Ty1); - const VectorType *VTy2 = cast<VectorType>(Ty2); - return VTy1->getNumElements() == VTy2->getNumElements() && - isEquivalentType(VTy1->getElementType(), VTy2->getElementType()); - } } } -/// isEquivalentOperation - determine whether the two operations are the same -/// except that pointer-to-A and pointer-to-B are equivalent. This should be -/// kept in sync with Instruction::isSameOperationAs. +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. bool FunctionComparator::isEquivalentOperation(const Instruction *I1, const Instruction *I2) const { + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to isEquivalentType. + // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top + // * because of the above, we don't test for the tail bit on calls later on if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || !isEquivalentType(I1->getType(), I2->getType()) || @@ -263,14 +312,11 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) - return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && - CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<CallInst>(I2)->getAttributes().getRawPointer(); + return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes() == cast<CallInst>(I2)->getAttributes(); if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<InvokeInst>(I2)->getAttributes().getRawPointer(); + CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) return false; @@ -291,8 +337,7 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, return true; } -/// isEquivalentGEP - determine whether two GEP operations perform the same -/// underlying arithmetic. +// Determine whether two GEP operations perform the same underlying arithmetic. bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { // When we have target data, we can reduce the GEP down to the value in bytes @@ -315,17 +360,17 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, return false; for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) + if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) return false; } return true; } -/// Enumerate - Compare two values used by the two functions under pair-wise -/// comparison. If this is the first time the values are seen, they're added to -/// the mapping so that we will detect mismatches on next use. -bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { +// Compare two values used by the two functions under pair-wise comparison. If +// this is the first time the values are seen, they're added to the mapping so +// that we will detect mismatches on next use. +bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { // Check for function @f1 referring to itself and function @f2 referring to // itself, or referring to each other, or both referring to either of them. // They're all equivalent if the two functions are otherwise equivalent. @@ -334,35 +379,44 @@ bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { if (V1 == F2 && V2 == F1) return true; - // TODO: constant expressions with GEP or references to F1 or F2. - if (isa<Constant>(V1)) - return V1 == V2; - - if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { - const InlineAsm *IA1 = cast<InlineAsm>(V1); - const InlineAsm *IA2 = cast<InlineAsm>(V2); - return IA1->getAsmString() == IA2->getAsmString() && - IA1->getConstraintString() == IA2->getConstraintString(); + if (const Constant *C1 = dyn_cast<Constant>(V1)) { + if (V1 == V2) return true; + const Constant *C2 = dyn_cast<Constant>(V2); + if (!C2) return false; + // TODO: constant expressions with GEP or references to F1 or F2. + if (C1->isNullValue() && C2->isNullValue() && + isEquivalentType(C1->getType(), C2->getType())) + return true; + // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 + // then they must have equal bit patterns. + return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && + C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType()); } - unsigned long &ID1 = Map1[V1]; - if (!ID1) - ID1 = ++IDMap1Count; + if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2)) + return V1 == V2; - unsigned long &ID2 = Map2[V2]; - if (!ID2) - ID2 = ++IDMap2Count; + // Check that V1 maps to V2. If we find a value that V1 maps to then we simply + // check whether it's equal to V2. When there is no mapping then we need to + // ensure that V2 isn't already equivalent to something else. For this + // purpose, we track the V2 values in a set. - return ID1 == ID2; + const Value *&map_elem = id_map[V1]; + if (map_elem) + return map_elem == V2; + if (!seen_values.insert(V2).second) + return false; + map_elem = V2; + return true; } -/// Compare - test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { +// Test whether two basic blocks have equivalent behaviour. +bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); do { - if (!Enumerate(F1I, F2I)) + if (!enumerate(F1I, F2I)) return false; if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { @@ -370,7 +424,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { if (!GEP2) return false; - if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; if (!isEquivalentGEP(GEP1, GEP2)) @@ -384,7 +438,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { Value *OpF1 = F1I->getOperand(i); Value *OpF2 = F2I->getOperand(i); - if (!Enumerate(OpF1, OpF2)) + if (!enumerate(OpF1, OpF2)) return false; if (OpF1->getValueID() != OpF2->getValueID() || @@ -399,8 +453,8 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { return F1I == F1E && F2I == F2E; } -/// Compare - test whether the two functions have equivalent behaviour. -bool FunctionComparator::Compare() { +// Test whether the two functions have equivalent behaviour. +bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -431,14 +485,14 @@ bool FunctionComparator::Compare() { return false; assert(F1->arg_size() == F2->arg_size() && - "Identical functions have a different number of args."); + "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. for (Function::const_arg_iterator f1i = F1->arg_begin(), f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!Enumerate(f1i, f2i)) - llvm_unreachable("Arguments repeat"); + if (!enumerate(f1i, f2i)) + llvm_unreachable("Arguments repeat!"); } // We do a CFG-ordered walk since the actual ordering of the blocks in the @@ -456,7 +510,7 @@ bool FunctionComparator::Compare() { const BasicBlock *F1BB = F1BBs.pop_back_val(); const BasicBlock *F2BB = F2BBs.pop_back_val(); - if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB)) + if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) return false; const TerminatorInst *F1TI = F1BB->getTerminator(); @@ -474,23 +528,190 @@ bool FunctionComparator::Compare() { return true; } -/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace -/// direct uses of G with bitcast(F). -void MergeFunctions::WriteThunk(Function *F, Function *G) const { +namespace { + +/// MergeFunctions finds functions which will generate identical machine code, +/// by considering all pointer types to be equivalent. Once identified, +/// MergeFunctions will fold them by replacing a call to one to a call to a +/// bitcast of the other. +/// +class MergeFunctions : public ModulePass { +public: + static char ID; + MergeFunctions() + : ModulePass(ID), HasGlobalAliases(false) { + initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M); + +private: + typedef DenseSet<ComparableFunction> FnSetType; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + std::vector<WeakVH> Deferred; + + /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// equal to one that's already present. + bool insert(ComparableFunction &NewF); + + /// Remove a Function from the FnSet and queue it up for a second sweep of + /// analysis. + void remove(Function *F); + + /// Find the functions that use this Value and remove them from FnSet and + /// queue the functions. + void removeUsers(Value *V); + + /// Replace all direct calls of Old with calls of New. Will bitcast New if + /// necessary to make types match. + void replaceDirectCallers(Function *Old, Function *New); + + /// Merge two equivalent functions. Upon completion, G may be deleted, or may + /// be converted into a thunk. In either case, it should never be visited + /// again. + void mergeTwoFunctions(Function *F, Function *G); + + /// Replace G with a thunk or an alias to F. Deletes G. + void writeThunkOrAlias(Function *F, Function *G); + + /// Replace G with a simple tail call to bitcast(F). Also replace direct uses + /// of G with bitcast(F). Deletes G. + void writeThunk(Function *F, Function *G); + + /// Replace G with an alias to F. Deletes G. + void writeAlias(Function *F, Function *G); + + /// The set of all distinct functions. Use the insert() and remove() methods + /// to modify it. + FnSetType FnSet; + + /// TargetData for more accurate GEP comparisons. May be NULL. + TargetData *TD; + + /// Whether or not the target supports global aliases. + bool HasGlobalAliases; +}; + +} // end anonymous namespace + +char MergeFunctions::ID = 0; +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + TD = getAnalysisIfAvailable<TargetData>(); + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + Deferred.push_back(WeakVH(I)); + } + FnSet.resize(Deferred.size()); + + do { + std::vector<WeakVH> Worklist; + Deferred.swap(Worklist); + + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); + DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); + + // Insert only strong functions and merge them. Strong function merging + // always deletes one of them. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + !F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + + // Insert only weak functions and merge them. By doing these second we + // create thunks to the strong function when possible. When two weak + // functions are identical, we create a new strong function with two weak + // weak thunks to it which are identical but not mergable. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + } while (!Deferred.empty()); + + FnSet.clear(); + + return Changed; +} + +bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS) { + if (LHS.getFunc() == RHS.getFunc() && + LHS.getHash() == RHS.getHash()) + return true; + if (!LHS.getFunc() || !RHS.getFunc()) + return false; + + // One of these is a special "underlying pointer comparison only" object. + if (LHS.getTD() == ComparableFunction::LookupOnly || + RHS.getTD() == ComparableFunction::LookupOnly) + return false; + + assert(LHS.getTD() == RHS.getTD() && + "Comparing functions for different targets"); + + return FunctionComparator(LHS.getTD(), LHS.getFunc(), + RHS.getFunc()).compare(); +} + +// Replace direct callers of Old with New. +void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { + Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) { + remove(CS.getInstruction()->getParent()->getParent()); + TheIter.getUse().set(BitcastNew); + } + } +} + +// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. +void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { + if (HasGlobalAliases && G->hasUnnamedAddr()) { + if (G->hasExternalLinkage() || G->hasLocalLinkage() || + G->hasWeakLinkage()) { + writeAlias(F, G); + return; + } + } + + writeThunk(F, G); +} + +// Replace G with a simple tail call to bitcast(F). Also replace direct uses +// of G with bitcast(F). Deletes G. +void MergeFunctions::writeThunk(Function *F, Function *G) { if (!G->mayBeOverridden()) { // Redirect direct callers of G to F. - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); - UI != UE;) { - Value::use_iterator TheIter = UI; - ++UI; - CallSite CS(*TheIter); - if (CS && CS.isCallee(TheIter)) - TheIter.getUse().set(BitcastF); - } + replaceDirectCallers(G, F); } - // If G was internal then we may have replaced all uses if G with F. If so, + // If G was internal then we may have replaced all uses of G with F. If so, // stop here and delete G. There's no need for a thunk. if (G->hasLocalLinkage() && G->use_empty()) { G->eraseFromParent(); @@ -522,131 +743,126 @@ void MergeFunctions::WriteThunk(Function *F, Function *G) const { NewG->copyAttributesFrom(G); NewG->takeName(G); + removeUsers(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); + + DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); + ++NumThunksWritten; } -/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, -/// Function G is deleted. -void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const { - if (F->isWeakForLinker()) { - assert(G->isWeakForLinker()); +// Replace G with an alias to F and delete G. +void MergeFunctions::writeAlias(Function *F, Function *G) { + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", + BitcastF, G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + removeUsers(G); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); + + DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); + ++NumAliasesWritten; +} + +// Merge two equivalent functions. Upon completion, Function G is deleted. +void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { + if (F->mayBeOverridden()) { + assert(G->mayBeOverridden()); + + if (HasGlobalAliases) { + // Make them both thunks to the same internal function. + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + removeUsers(F); + F->replaceAllUsesWith(H); - // Make them both thunks to the same internal function. - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); + writeAlias(F, G); + writeAlias(F, H); - WriteThunk(F, G); - WriteThunk(F, H); + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::PrivateLinkage); + } else { + // We can't merge them. Instead, pick one and update all direct callers + // to call it and hope that we improve the instruction cache hit rate. + replaceDirectCallers(G, F); + } - F->setAlignment(MaxAlignment); - F->setLinkage(GlobalValue::InternalLinkage); + ++NumDoubleWeak; } else { - WriteThunk(F, G); + writeThunkOrAlias(F, G); } ++NumFunctionsMerged; } -static unsigned ProfileFunction(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); - - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); - return ID.ComputeHash(); -} - -class ComparableFunction { -public: - ComparableFunction(Function *Func, TargetData *TD) - : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {} +// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// that was already inserted. +bool MergeFunctions::insert(ComparableFunction &NewF) { + std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); + if (Result.second) { + DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + return false; + } - AssertingVH<Function> const Func; - const unsigned Hash; - TargetData * const TD; -}; + const ComparableFunction &OldF = *Result.first; -struct MergeFunctionsEqualityInfo { - static ComparableFunction *getEmptyKey() { - return reinterpret_cast<ComparableFunction*>(0); - } - static ComparableFunction *getTombstoneKey() { - return reinterpret_cast<ComparableFunction*>(-1); - } - static unsigned getHashValue(const ComparableFunction *CF) { - return CF->Hash; - } - static bool isEqual(const ComparableFunction *LHS, - const ComparableFunction *RHS) { - if (LHS == RHS) - return true; - if (LHS == getEmptyKey() || LHS == getTombstoneKey() || - RHS == getEmptyKey() || RHS == getTombstoneKey()) - return false; - assert(LHS->TD == RHS->TD && "Comparing functions for different targets"); - return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare(); - } -}; + // Never thunk a strong function to a weak function. + assert(!OldF.getFunc()->mayBeOverridden() || + NewF.getFunc()->mayBeOverridden()); -bool MergeFunctions::runOnModule(Module &M) { - typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType; + DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " + << NewF.getFunc()->getName() << '\n'); - bool Changed = false; - TD = getAnalysisIfAvailable<TargetData>(); + Function *DeleteF = NewF.getFunc(); + NewF.release(); + mergeTwoFunctions(OldF.getFunc(), DeleteF); + return true; +} - std::vector<Function *> Funcs; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) - Funcs.push_back(F); +// Remove a function from FnSet. If it was already in FnSet, add it to Deferred +// so that we'll look at it in the next round. +void MergeFunctions::remove(Function *F) { + // We need to make sure we remove F, not a function "equal" to F per the + // function equality comparator. + // + // The special "lookup only" ComparableFunction bypasses the expensive + // function comparison in favour of a pointer comparison on the underlying + // Function*'s. + ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); + if (FnSet.erase(CF)) { + DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + Deferred.push_back(F); } +} - bool LocalChanged; - do { - LocalChanged = false; - - FnSetType FnSet; - for (unsigned i = 0, e = Funcs.size(); i != e;) { - Function *F = Funcs[i]; - ComparableFunction *NewF = new ComparableFunction(F, TD); - std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); - if (!Result.second) { - ComparableFunction *&OldF = *Result.first; - assert(OldF && "Expected a hash collision"); - - // NewF will be deleted in favour of OldF unless NewF is strong and - // OldF is weak in which case swap them to keep the strong definition. - - if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker()) - std::swap(OldF, NewF); - - DEBUG(dbgs() << " " << OldF->Func->getName() << " == " - << NewF->Func->getName() << '\n'); - - Funcs.erase(Funcs.begin() + i); - --e; - - Function *DeleteF = NewF->Func; - delete NewF; - MergeTwoFunctions(OldF->Func, DeleteF); - LocalChanged = true; - Changed = true; - } else { - ++i; +// For each instruction used by the value, remove() the function that contains +// the instruction. This should happen right before a call to RAUW. +void MergeFunctions::removeUsers(Value *V) { + std::vector<Value *> Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + Use &U = UI.getUse(); + if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { + remove(I->getParent()->getParent()); + } else if (isa<GlobalValue>(U.getUser())) { + // do nothing + } else if (Constant *C = dyn_cast<Constant>(U.getUser())) { + for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end(); + CUI != CUE; ++CUI) + Worklist.push_back(*CUI); } } - DeleteContainerPointers(FnSet); - } while (LocalChanged); - - return Changed; + } } diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index 432f7c5..2afd029 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -30,7 +30,9 @@ namespace { struct PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - PartialInliner() : ModulePass(ID) {} + PartialInliner() : ModulePass(ID) { + initializePartialInlinerPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module& M); @@ -41,7 +43,7 @@ namespace { char PartialInliner::ID = 0; INITIALIZE_PASS(PartialInliner, "partial-inliner", - "Partial Inliner", false, false); + "Partial Inliner", false, false) ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } @@ -67,7 +69,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { return 0; // Clone the function, so that we can hack away on it. - ValueMap<const Value*, Value*> VMap; + ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp deleted file mode 100644 index 4a99a41..0000000 --- a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//===-- PartialSpecialization.cpp - Specialize for common constants--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass finds function arguments that are often a common constant and -// specializes a version of the called function for that constant. -// -// This pass simply does the cloning for functions it specializes. It depends -// on IPSCCP and DAE to clean up the results. -// -// The initial heuristic favors constant arguments that are used in control -// flow. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "partialspecialization" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constant.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Support/CallSite.h" -#include "llvm/ADT/DenseSet.h" -#include <map> -using namespace llvm; - -STATISTIC(numSpecialized, "Number of specialized functions created"); -STATISTIC(numReplaced, "Number of callers replaced by specialization"); - -// Maximum number of arguments markable interested -static const int MaxInterests = 6; - -// Call must be used at least occasionally -static const int CallsMin = 5; - -// Must have 10% of calls having the same constant to specialize on -static const double ConstValPercent = .1; - -namespace { - typedef SmallVector<int, MaxInterests> InterestingArgVector; - class PartSpec : public ModulePass { - void scanForInterest(Function&, InterestingArgVector&); - int scanDistribution(Function&, int, std::map<Constant*, int>&); - public : - static char ID; // Pass identification, replacement for typeid - PartSpec() : ModulePass(ID) {} - bool runOnModule(Module &M); - }; -} - -char PartSpec::ID = 0; -INITIALIZE_PASS(PartSpec, "partialspecialization", - "Partial Specialization", false, false); - -// Specialize F by replacing the arguments (keys) in replacements with the -// constants (values). Replace all calls to F with those constants with -// a call to the specialized function. Returns the specialized function -static Function* -SpecializeFunction(Function* F, - ValueMap<const Value*, Value*>& replacements) { - // arg numbers of deleted arguments - DenseMap<unsigned, const Argument*> deleted; - for (ValueMap<const Value*, Value*>::iterator - repb = replacements.begin(), repe = replacements.end(); - repb != repe; ++repb) { - Argument const *arg = cast<const Argument>(repb->first); - deleted[arg->getArgNo()] = arg; - } - - Function* NF = CloneFunction(F, replacements, - /*ModuleLevelChanges=*/false); - NF->setLinkage(GlobalValue::InternalLinkage); - F->getParent()->getFunctionList().push_back(NF); - - for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); - ii != ee; ) { - Value::use_iterator i = ii; - ++ii; - User *U = *i; - CallSite CS(U); - if (CS) { - if (CS.getCalledFunction() == F) { - SmallVector<Value*, 6> args; - // Assemble the non-specialized arguments for the updated callsite. - // In the process, make sure that the specialized arguments are - // constant and match the specialization. If that's not the case, - // this callsite needs to call the original or some other - // specialization; don't change it here. - CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end(); - for (CallSite::arg_iterator ai = as; ai != ae; ++ai) { - DenseMap<unsigned, const Argument*>::iterator delit = deleted.find( - std::distance(as, ai)); - if (delit == deleted.end()) - args.push_back(cast<Value>(ai)); - else { - Constant *ci = dyn_cast<Constant>(ai); - if (!(ci && ci == replacements[delit->second])) - goto next_use; - } - } - Value* NCall; - if (CallInst *CI = dyn_cast<CallInst>(U)) { - NCall = CallInst::Create(NF, args.begin(), args.end(), - CI->getName(), CI); - cast<CallInst>(NCall)->setTailCall(CI->isTailCall()); - cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv()); - } else { - InvokeInst *II = cast<InvokeInst>(U); - NCall = InvokeInst::Create(NF, II->getNormalDest(), - II->getUnwindDest(), - args.begin(), args.end(), - II->getName(), II); - cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv()); - } - CS.getInstruction()->replaceAllUsesWith(NCall); - CS.getInstruction()->eraseFromParent(); - ++numReplaced; - } - } - next_use:; - } - return NF; -} - - -bool PartSpec::runOnModule(Module &M) { - bool Changed = false; - for (Module::iterator I = M.begin(); I != M.end(); ++I) { - Function &F = *I; - if (F.isDeclaration() || F.mayBeOverridden()) continue; - InterestingArgVector interestingArgs; - scanForInterest(F, interestingArgs); - - // Find the first interesting Argument that we can specialize on - // If there are multiple interesting Arguments, then those will be found - // when processing the cloned function. - bool breakOuter = false; - for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) { - std::map<Constant*, int> distribution; - int total = scanDistribution(F, interestingArgs[x], distribution); - if (total > CallsMin) - for (std::map<Constant*, int>::iterator ii = distribution.begin(), - ee = distribution.end(); ii != ee; ++ii) - if (total > ii->second && ii->first && - ii->second > total * ConstValPercent) { - ValueMap<const Value*, Value*> m; - Function::arg_iterator arg = F.arg_begin(); - for (int y = 0; y < interestingArgs[x]; ++y) - ++arg; - m[&*arg] = ii->first; - SpecializeFunction(&F, m); - ++numSpecialized; - breakOuter = true; - Changed = true; - } - } - } - return Changed; -} - -/// scanForInterest - This function decides which arguments would be worth -/// specializing on. -void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { - for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); - ii != ee; ++ii) { - for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); - ui != ue; ++ui) { - - bool interesting = false; - User *U = *ui; - if (isa<CmpInst>(U)) interesting = true; - else if (isa<CallInst>(U)) - interesting = ui->getOperand(0) == ii; - else if (isa<InvokeInst>(U)) - interesting = ui->getOperand(0) == ii; - else if (isa<SwitchInst>(U)) interesting = true; - else if (isa<BranchInst>(U)) interesting = true; - - if (interesting) { - args.push_back(std::distance(F.arg_begin(), ii)); - break; - } - } - } -} - -/// scanDistribution - Construct a histogram of constants for arg of F at arg. -int PartSpec::scanDistribution(Function& F, int arg, - std::map<Constant*, int>& dist) { - bool hasIndirect = false; - int total = 0; - for (Value::use_iterator ii = F.use_begin(), ee = F.use_end(); - ii != ee; ++ii) { - User *U = *ii; - CallSite CS(U); - if (CS && CS.getCalledFunction() == &F) { - ++dist[dyn_cast<Constant>(CS.getArgument(arg))]; - ++total; - } else - hasIndirect = true; - } - - // Preserve the original address taken function even if all other uses - // will be specialized. - if (hasIndirect) ++total; - return total; -} - -ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); } diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index 09ac76f..d91c2c4 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -37,7 +37,9 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized"); namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(ID) {} + PruneEH() : CallGraphSCCPass(ID) { + initializePruneEHPass(*PassRegistry::getPassRegistry()); + } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -48,8 +50,11 @@ namespace { } char PruneEH::ID = 0; -INITIALIZE_PASS(PruneEH, "prune-eh", - "Remove unused exception handling info", false, false); +INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) Pass *llvm::createPruneEHPass() { return new PruneEH(); } diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp index ee10ad0..b5f09ec 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -29,7 +29,9 @@ namespace { class StripDeadPrototypesPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(ID) { } + StripDeadPrototypesPass() : ModulePass(ID) { + initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); }; @@ -37,7 +39,7 @@ public: char StripDeadPrototypesPass::ID = 0; INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", - "Strip Unused Function Prototypes", false, false); + "Strip Unused Function Prototypes", false, false) bool StripDeadPrototypesPass::runOnModule(Module &M) { bool MadeChange = false; diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index 20b7b8f..a690765 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -39,7 +39,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripSymbols(bool ODI = false) - : ModulePass(ID), OnlyDebugInfo(ODI) {} + : ModulePass(ID), OnlyDebugInfo(ODI) { + initializeStripSymbolsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -52,7 +54,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -65,7 +69,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -78,7 +84,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDeadDebugInfo() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -90,7 +98,7 @@ namespace { char StripSymbols::ID = 0; INITIALIZE_PASS(StripSymbols, "strip", - "Strip all symbols from a module", false, false); + "Strip all symbols from a module", false, false) ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { return new StripSymbols(OnlyDebugInfo); @@ -99,7 +107,7 @@ ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { char StripNonDebugSymbols::ID = 0; INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug", "Strip all symbols, except dbg symbols, from a module", - false, false); + false, false) ModulePass *llvm::createStripNonDebugSymbolsPass() { return new StripNonDebugSymbols(); @@ -107,7 +115,7 @@ ModulePass *llvm::createStripNonDebugSymbolsPass() { char StripDebugDeclare::ID = 0; INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare", - "Strip all llvm.dbg.declare intrinsics", false, false); + "Strip all llvm.dbg.declare intrinsics", false, false) ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); @@ -115,7 +123,7 @@ ModulePass *llvm::createStripDebugDeclarePass() { char StripDeadDebugInfo::ID = 0; INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info", - "Strip debug info for unused symbols", false, false); + "Strip debug info for unused symbols", false, false) ModulePass *llvm::createStripDeadDebugInfoPass() { return new StripDeadDebugInfo(); diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp index b82b03f..584deac 100644 --- a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp @@ -50,7 +50,9 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(ID) {} + SRETPromotion() : CallGraphSCCPass(ID) { + initializeSRETPromotionPass(*PassRegistry::getPassRegistry()); + } private: CallGraphNode *PromoteReturn(CallGraphNode *CGN); @@ -61,8 +63,11 @@ namespace { } char SRETPromotion::ID = 0; -INITIALIZE_PASS(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false); +INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion", + "Promote sret arguments to multiple ret values", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(SRETPromotion, "sretpromotion", + "Promote sret arguments to multiple ret values", false, false) Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index 6f9609c..9c2969c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -81,7 +81,9 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {} + InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { + initializeInstCombinerPass(*PassRegistry::getPassRegistry()); + } public: virtual bool runOnFunction(Function &F); @@ -143,6 +145,8 @@ public: ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); + Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, ICmpInst::Predicate Pred, Value *TheAdd); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, @@ -284,9 +288,16 @@ public: private: - /// SimplifyCommutative - This performs a few simplifications for - /// commutative operators. - bool SimplifyCommutative(BinaryOperator &I); + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for + /// operators which are associative or commutative. + bool SimplifyAssociativeOrCommutative(BinaryOperator &I); + + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations + /// which some other binary operation distributes over either by factorizing + /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this + /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is + /// a win). Returns the simplified value, or null if it didn't simplify. + Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. @@ -310,10 +321,7 @@ private: // into the PHI (which is only possible if all operands to the PHI are // constants). // - // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms - // that would normally be unprofitable because they strongly encourage jump - // threading. - Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); + Instruction *FoldOpIntoPhi(Instruction &I); // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" // operator and they all are only used by the PHI, PHI together their @@ -339,10 +347,6 @@ private: Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - - unsigned GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign = 0); - }; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 4d2c89e..c36a955 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -84,43 +84,37 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); - - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { - // X + (signbit) --> X ^ signbit - const APInt& Val = CI->getValue(); - uint32_t BitWidth = Val.getBitWidth(); - if (Val == APInt::getSignBit(BitWidth)) - return BinaryOperator::CreateXor(LHS, RHS); - - // See if SimplifyDemandedBits can simplify this. This handles stuff like - // (X & 254)+1 -> (X&254)|1 - if (SimplifyDemandedInstructionBits(I)) - return &I; - - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - } + // (A*B)+(A*C) -> A*(B+C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // X + (signbit) --> X ^ signbit + const APInt &Val = CI->getValue(); + if (Val.isSignBit()) + return BinaryOperator::CreateXor(LHS, RHS); + + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) + if (ZI->getSrcTy()->isIntegerTy(1)) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - ConstantInt *XorRHS = 0; - Value *XorLHS = 0; - if (isa<ConstantInt>(RHSC) && - match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + Value *XorLHS = 0; ConstantInt *XorRHS = 0; + if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); - const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); + const APInt &RHSVal = CI->getValue(); unsigned ExtendAmt = 0; // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. @@ -130,13 +124,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } - + if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } - + if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); @@ -145,34 +139,28 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(LHS, RHS); - if (I.getType()->isIntegerTy()) { - // X + X --> X << 1 - if (LHS == RHS) - return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); - - if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { - if (RHSI->getOpcode() == Instruction::Sub) - if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B - return ReplaceInstUsesWith(I, RHSI->getOperand(0)); - } - if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { - if (LHSI->getOpcode() == Instruction::Sub) - if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B - return ReplaceInstUsesWith(I, LHSI->getOperand(0)); - } + // X + X --> X << 1 + if (LHS == RHS) { + BinaryOperator *New = + BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); + New->setHasNoSignedWrap(I.hasNoSignedWrap()); + New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return New; } // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVectorTy()) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); } return BinaryOperator::CreateSub(RHS, LHSV); @@ -199,11 +187,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (dyn_castFoldableMul(RHS, C2) == LHS) return BinaryOperator::CreateMul(LHS, AddOne(C2)); - // X + ~X --> -1 since ~X = -X-1 - if (match(LHS, m_Not(m_Specific(RHS))) || - match(RHS, m_Not(m_Specific(LHS)))) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - // A+B --> A|B iff A and B have no bits set in common. if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); @@ -222,7 +205,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVectorTy()) { + { Value *W, *X, *Y, *Z; if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { @@ -251,24 +234,22 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && - match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); - if (Anded == CRHS) { - // See if all bits from the first bit set in the Add RHS up are included - // in the mask. First, get the rightmost bit. - const APInt &AddRHSV = CRHS->getValue(); - - // Form a mask of all bits from the lowest bit added through the top. - APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); - - // See if the and mask includes all of these bits. - APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - - if (AddRHSHighBits == AddRHSHighBitsAnd) { - // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); - return BinaryOperator::CreateAnd(NewAdd, C2); - } + match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && + CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt &AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -293,12 +274,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Can we fold the add into the argument of the select? // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && - match(TV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && - match(FV, m_Sub(m_Value(N), m_Specific(A)))) + + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); } @@ -342,7 +322,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Constant *RHSC = dyn_cast<Constant>(RHS)) { @@ -424,6 +404,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); Value *Result = Constant::getNullValue(IntPtrTy); + // If the GEP is inbounds, we know that none of the addressing operations will + // overflow in an unsigned sense. + bool isInBounds = cast<GEPOperator>(GEP)->isInBounds(); + // Build a mask for high order bits. unsigned IntPtrWidth = TD.getPointerSizeInBits(); uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); @@ -439,16 +423,16 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { if (const StructType *STy = dyn_cast<StructType>(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - Result = Builder->CreateAdd(Result, - ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); + if (Size) + Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); continue; } Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale); + Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); // Emit an add instruction. Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; @@ -457,9 +441,9 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { if (Op->getType() != IntPtrTy) Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { - Constant *Scale = ConstantInt::get(IntPtrTy, Size); // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); + Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".idx", isInBounds /*NUW*/); } // Emit an add instruction. @@ -545,8 +529,13 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + // (A*B)-(A*C) -> A*(B-C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. if (Value *V = dyn_castNegVal(Op1)) { @@ -556,18 +545,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return Res; } - if (isa<UndefValue>(Op0)) - return ReplaceInstUsesWith(I, Op0); // undef - X -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X - undef -> undef if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(Op0, Op1); + + // Replace (-1 - A) with (~A). + if (match(Op0, m_AllOnes())) + return BinaryOperator::CreateNot(Op1); if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { - // Replace (-1 - A) with (~A). - if (C->isAllOnesValue()) - return BinaryOperator::CreateNot(Op1); - // C - ~X == X + (1+C) Value *X = 0; if (match(Op1, m_Not(m_Value(X)))) @@ -576,29 +561,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) if (C->isZero()) { - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { - if (SI->getOpcode() == Instruction::LShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert AShr. - return BinaryOperator::Create(Instruction::AShr, - SI->getOperand(0), CU, SI->getName()); - } - } - } else if (SI->getOpcode() == Instruction::AShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert LShr. - return BinaryOperator::CreateLShr( - SI->getOperand(0), CU, SI->getName()); - } - } - } - } + Value *X; ConstantInt *CI; + if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateAShr(X, CI); + + if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateLShr(X, CI); } // Try to fold constant sub into select arguments. @@ -608,86 +580,80 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // C - zext(bool) -> bool ? C - 1 : C if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); + + // C-(X+C2) --> (C-C2)-X + ConstantInt *C2; + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) + return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); } - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op1I->getOpcode() == Instruction::Add) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0), - I.getName()); - else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { - if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) - // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub( - ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); - } + + { Value *Y; + // X-(X+Y) == -Y X-(Y+X) == -Y + if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || + match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) + return BinaryOperator::CreateNeg(Y); + + // (X-Y)-X == -Y + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) + return BinaryOperator::CreateNeg(Y); + } + + if (Op1->hasOneUse()) { + Value *X = 0, *Y = 0, *Z = 0; + Constant *C = 0; + ConstantInt *CI = 0; + + // (X - (Y - Z)) --> (X + (Z - Y)). + if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) + return BinaryOperator::CreateAdd(Op0, + Builder->CreateSub(Z, Y, Op1->getName())); + + // (X - (X & Y)) --> (X & ~Y) + // + if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || + match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) + return BinaryOperator::CreateAnd(Op0, + Builder->CreateNot(Y, Y->getName() + ".not")); + + // 0 - (X sdiv C) -> (X sdiv -C) + if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && + match(Op0, m_Zero())) + return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); + + // 0 - (X << Y) -> (-X << Y) when X is freely negatable. + if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) + if (Value *XNeg = dyn_castNegVal(X)) + return BinaryOperator::CreateShl(XNeg, Y); + + // X - X*C --> X * (1-C) + if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); + return BinaryOperator::CreateMul(Op0, CP1); } - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::Sub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level add instruction... - return BinaryOperator::CreateAdd(Op0, Op1); - } - - // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... - // - if (Op1I->getOpcode() == Instruction::And && - (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { - Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - - Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); - return BinaryOperator::CreateAnd(Op0, NewNot); - } - - // 0 - (X sdiv C) -> (X sdiv -C) - if (Op1I->getOpcode() == Instruction::SDiv) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) - return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - ConstantExpr::getNeg(DivRHS)); - - // 0 - (C << X) -> (-C << X) - if (Op1I->getOpcode() == Instruction::Shl) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0))) - return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1)); - - // X - X*C --> X * (1-C) - ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), - C2); - return BinaryOperator::CreateMul(Op0, CP1); - } + // X - X<<C --> X * (1-(1<<C)) + if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *One = ConstantInt::get(I.getType(), 1); + C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); + return BinaryOperator::CreateMul(Op0, C); } - } - - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), - I.getName()); + + // X - A*-B -> X + A*B + // X - -A*B -> X + A*B + Value *A, *B; + if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || + match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) + return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + + // X - A*CI -> X + A*-CI + // X - CI*A -> X + A*-CI + if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || + match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { + Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + return BinaryOperator::CreateAdd(Op0, NewMul); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 19a05bf..b6b6b84 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -172,7 +172,9 @@ static Value *getFCmpValue(bool isordered, unsigned code, case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; - case 7: return ConstantInt::getTrue(LHS->getContext()); + case 7: + if (!isordered) return ConstantInt::getTrue(LHS->getContext()); + Pred = FCmpInst::FCMP_ORD; break; } return Builder->CreateFCmp(Pred, LHS, RHS); } @@ -207,15 +209,26 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, } break; case Instruction::Or: - if (Together == AndRHS) // (X | C) & C --> C - return ReplaceInstUsesWith(TheAnd, AndRHS); - - if (Op->hasOneUse() && Together != OpRHS) { - // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Value *Or = Builder->CreateOr(X, Together); - Or->takeName(Op); - return BinaryOperator::CreateAnd(Or, AndRHS); + if (Op->hasOneUse()){ + if (Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + + ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together); + if (TogetherCI && !TogetherCI->isZero()){ + // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 + // NOTE: This reduces the number of bits set in the & mask, which + // can expose opportunities for store narrowing. + Together = ConstantExpr::getXor(AndRHS, Together); + Value *And = Builder->CreateAnd(X, Together); + And->takeName(Op); + return BinaryOperator::CreateOr(And, OpRHS); + } } + break; case Instruction::Add: if (Op->hasOneUse()) { @@ -261,10 +274,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), AndRHS->getValue() & ShlMask); - if (CI->getValue() == ShlMask) { - // Masking out bits that the shift already masks + if (CI->getValue() == ShlMask) + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - } else if (CI != AndRHS) { // Reducing bits set in and. + + if (CI != AndRHS) { // Reducing bits set in and. TheAnd.setOperand(1, CI); return &TheAnd; } @@ -281,10 +295,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, ConstantInt *CI = ConstantInt::get(Op->getContext(), AndRHS->getValue() & ShrMask); - if (CI->getValue() == ShrMask) { - // Masking out bits that the shift already masks. + if (CI->getValue() == ShrMask) + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); - } else if (CI != AndRHS) { + + if (CI != AndRHS) { TheAnd.setOperand(1, CI); // Reduce bits set in and cst. return &TheAnd; } @@ -434,6 +449,270 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); } +/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C) +/// One of A and B is considered the mask, the other the value. This is +/// described as the "AMask" or "BMask" part of the enum. If the enum +/// contains only "Mask", then both A and B can be considered masks. +/// If A is the mask, then it was proven, that (A & C) == C. This +/// is trivial if C == A, or C == 0. If both A and C are constants, this +/// proof is also easy. +/// For the following explanations we assume that A is the mask. +/// The part "AllOnes" declares, that the comparison is true only +/// if (A & B) == A, or all bits of A are set in B. +/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes +/// The part "AllZeroes" declares, that the comparison is true only +/// if (A & B) == 0, or all bits of A are cleared in B. +/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes +/// The part "Mixed" declares, that (A & B) == C and C might or might not +/// contain any number of one bits and zero bits. +/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed +/// The Part "Not" means, that in above descriptions "==" should be replaced +/// by "!=". +/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes +/// If the mask A contains a single bit, then the following is equivalent: +/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) +/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) +enum MaskedICmpType { + FoldMskICmp_AMask_AllOnes = 1, + FoldMskICmp_AMask_NotAllOnes = 2, + FoldMskICmp_BMask_AllOnes = 4, + FoldMskICmp_BMask_NotAllOnes = 8, + FoldMskICmp_Mask_AllZeroes = 16, + FoldMskICmp_Mask_NotAllZeroes = 32, + FoldMskICmp_AMask_Mixed = 64, + FoldMskICmp_AMask_NotMixed = 128, + FoldMskICmp_BMask_Mixed = 256, + FoldMskICmp_BMask_NotMixed = 512 +}; + +/// return the set of pattern classes (from MaskedICmpType) +/// that (icmp SCC (A & B), C) satisfies +static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, + ICmpInst::Predicate SCC) +{ + ConstantInt *ACst = dyn_cast<ConstantInt>(A); + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); + bool icmp_abit = (ACst != 0 && !ACst->isZero() && + ACst->getValue().isPowerOf2()); + bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + BCst->getValue().isPowerOf2()); + unsigned result = 0; + if (CCst != 0 && CCst->isZero()) { + // if C is zero, then both A and B qualify as mask + result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed | + FoldMskICmp_BMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed)); + return result; + } + if (A == C) { + result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed) + : (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed)); + } + else if (ACst != 0 && CCst != 0 && + ConstantExpr::getAnd(ACst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_AMask_Mixed + : FoldMskICmp_AMask_NotMixed); + } + if (B == C) + { + result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_BMask_Mixed)); + } + else if (BCst != 0 && CCst != 0 && + ConstantExpr::getAnd(BCst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_BMask_Mixed + : FoldMskICmp_BMask_NotMixed); + } + return result; +} + +/// foldLogOpOfMaskedICmpsHelper: +/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// return the set of pattern classes (from MaskedICmpType) +/// that both LHS and RHS satisfy +static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, + Value*& B, Value*& C, + Value*& D, Value*& E, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; + if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; + // vectors are not (yet?) supported + if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; + + // Here comes the tricky part: + // LHS might be of the form L11 & L12 == X, X == L21 & L22, + // and L11 & L12 == L21 & L22. The same goes for RHS. + // Now we must find those components L** and R**, that are equal, so + // that we can extract the parameters A, B, C, D, and E for the canonical + // above. + Value *L1 = LHS->getOperand(0); + Value *L2 = LHS->getOperand(1); + Value *L11,*L12,*L21,*L22; + if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { + if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + L21 = L22 = 0; + } + else { + if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) + return 0; + std::swap(L1, L2); + L21 = L22 = 0; + } + + Value *R1 = RHS->getOperand(0); + Value *R2 = RHS->getOperand(1); + Value *R11,*R12; + bool ok = false; + if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { + A = R11; D = R12; E = R2; ok = true; + } + else + if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + A = R12; D = R11; E = R2; ok = true; + } + } + if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { + A = R11; D = R12; E = R1; ok = true; + } + else + if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + A = R12; D = R11; E = R1; ok = true; + } + else + return 0; + } + if (!ok) + return 0; + + if (L11 == A) { + B = L12; C = L2; + } + else if (L12 == A) { + B = L11; C = L2; + } + else if (L21 == A) { + B = L22; C = L1; + } + else if (L22 == A) { + B = L21; C = L1; + } + + unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC); + unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC); + return left_type & right_type; +} +/// foldLogOpOfMaskedICmps: +/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// into a single (icmp(A & X) ==/!= Y) +static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate NEWCC, + llvm::InstCombiner::BuilderTy* Builder) { + Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); + if (mask == 0) return 0; + + if (NEWCC == ICmpInst::ICMP_NE) + mask >>= 1; // treat "Not"-states as normal states + + if (mask & FoldMskICmp_Mask_AllZeroes) { + // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) + // -> (icmp eq (A & (B|D)), 0) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + // we can't use C as zero, because we might actually handle + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // with B and D, having a single bit set + Value* zero = Constant::getNullValue(A->getType()); + return Builder->CreateICmp(NEWCC, newAnd, zero); + } + else if (mask & FoldMskICmp_BMask_AllOnes) { + // (icmp eq (A & B), B) & (icmp eq (A & D), D) + // -> (icmp eq (A & (B|D)), (B|D)) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + return Builder->CreateICmp(NEWCC, newAnd, newOr); + } + else if (mask & FoldMskICmp_AMask_AllOnes) { + // (icmp eq (A & B), A) & (icmp eq (A & D), A) + // -> (icmp eq (A & (B&D)), A) + Value* newAnd1 = Builder->CreateAnd(B, D); + Value* newAnd = Builder->CreateAnd(A, newAnd1); + return Builder->CreateICmp(NEWCC, newAnd, A); + } + else if (mask & FoldMskICmp_BMask_Mixed) { + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + // We already know that B & C == C && D & E == E. + // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of + // C and E, which are shared by both the mask B and the mask D, don't + // contradict, then we can transform to + // -> (icmp eq (A & (B|D)), (C|E)) + // Currently, we only handle the case of B, C, D, and E being constant. + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + if (BCst == 0) return 0; + ConstantInt *DCst = dyn_cast<ConstantInt>(D); + if (DCst == 0) return 0; + // we can't simply use C and E, because we might actually handle + // (icmp ne (A & B), B) & (icmp eq (A & D), D) + // with B and D, having a single bit set + + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + if (CCst == 0) return 0; + if (LHS->getPredicate() != NEWCC) + CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); + ConstantInt *ECst = dyn_cast<ConstantInt>(E); + if (ECst == 0) return 0; + if (RHS->getPredicate() != NEWCC) + ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); + ConstantInt* MCst = dyn_cast<ConstantInt>( + ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), + ConstantExpr::getXor(CCst, ECst)) ); + // if there is a conflict we should actually return a false for the + // whole construct + if (!MCst->isZero()) + return 0; + Value *newOr1 = Builder->CreateOr(B, D); + Value *newOr2 = ConstantExpr::getOr(CCst, ECst); + Value *newAnd = Builder->CreateAnd(A, newOr1); + return Builder->CreateICmp(NEWCC, newAnd, newOr2); + } + return 0; +} + /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); @@ -451,6 +730,10 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } + + // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder)) + return V; // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); @@ -472,22 +755,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> - // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT - if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *Op1 = 0, *Op2 = 0; - ConstantInt *CI1 = 0, *CI2 = 0; - if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && - match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { - if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && - CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { - Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); - return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); - } - } - } } // From here on, we only handle: @@ -712,12 +979,16 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Instruction *InstCombiner::visitAnd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyAndInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -725,7 +996,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); - APInt NotAndRHS(~AndRHSMask); // Optimize a variety of ((val OP C1) & C2) combinations... if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { @@ -734,10 +1004,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { switch (Op0I->getOpcode()) { default: break; case Instruction::Xor: - case Instruction::Or: + case Instruction::Or: { // If the mask is only needed on one incoming arm, push it up. if (!Op0I->hasOneUse()) break; + APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { // Not masking anything out for the LHS, move to RHS. Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, @@ -753,6 +1024,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } break; + } case Instruction::Add: // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 @@ -772,14 +1044,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS // has 1's for all bits that the subtraction with A might affect. - if (Op0I->hasOneUse()) { + if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) { uint32_t BitWidth = AndRHSMask.getBitWidth(); uint32_t Zeros = AndRHSMask.countLeadingZeros(); APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); - if (!(A && A->isZero()) && // avoid infinite recursion. - MaskedValueIsZero(Op0LHS, Mask)) { + if (MaskedValueIsZero(Op0LHS, Mask)) { Value *NewNeg = Builder->CreateNeg(Op0RHS); return BinaryOperator::CreateAnd(NewNeg, AndRHS); } @@ -797,39 +1067,25 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } break; } - + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) return Res; - } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { - // If this is an integer truncation or change from signed-to-unsigned, and - // if the source is an and/or with immediate, transform it. This - // frequently occurs for bitfield accesses. - if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { - if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && - CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ - if (CastOp->getOpcode() == Instruction::And) { - // Change: and (cast (and X, C1) to T), C2 - // into : and (cast X to T), trunc_or_bitcast(C1)&C2 - // This will fold the two constants together, which may allow - // other simplifications. - Value *NewCast = Builder->CreateTruncOrBitCast( - CastOp->getOperand(0), I.getType(), - CastOp->getName()+".shrunk"); - // trunc_or_bitcast(C1)&C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - C3 = ConstantExpr::getAnd(C3, AndRHS); - return BinaryOperator::CreateAnd(NewCast, C3); - } else if (CastOp->getOpcode() == Instruction::Or) { - // Change: and (cast (or X, C1) to T), C2 - // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) - // trunc(C1)&C2 - return ReplaceInstUsesWith(I, AndRHS); - } - } + } + + // If this is an integer truncation, and if the source is an 'and' with + // immediate, transform it. This frequently occurs for bitfield accesses. + { + Value *X = 0; ConstantInt *YC = 0; + if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { + // Change: and (trunc (and X, YC) to T), C2 + // into : and (trunc X to T), trunc(YC) & C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); } } @@ -851,7 +1107,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { I.getName()+".demorgan"); return BinaryOperator::CreateNot(Or); } - + { Value *A = 0, *B = 0, *C = 0, *D = 0; // (A|B) & ~(A&B) -> A^B @@ -884,7 +1140,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { cast<BinaryOperator>(Op1)->swapOperands(); std::swap(A, B); } - if (A == Op0) // A&(A^B) -> A & ~B + // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if + // A is originally -1 (or a vector of -1 and undefs), then we enter + // an endless loop. By checking that A is non-constant we ensure that + // we will never get to the loop. + if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); } @@ -1160,7 +1420,12 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } - + + // handle (roughly): + // (icmp ne (A & B), C) | (icmp ne (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); @@ -1173,24 +1438,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) --> - // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT - if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { - Value *Op1 = 0, *Op2 = 0; - ConstantInt *CI1 = 0, *CI2 = 0; - if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && - match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { - if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && - CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { - Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); - return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr); - } - } - } } - + + // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) + // iff C2 + CA == C1. + if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) { + ConstantInt *AddCst; + if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst)))) + if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue()) + return Builder->CreateICmpULE(Val, LHSCst); + } + // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; @@ -1429,12 +1687,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, } Instruction *InstCombiner::visitOr(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyOrInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -1481,8 +1743,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. if (match(Op0, m_Or(m_Value(), m_Value())) || match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_Shift(m_Value(), m_Value())) && - match(Op1, m_Shift(m_Value(), m_Value())))) { + (match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())))) { if (Instruction *BSwap = MatchBSwap(I)) return BSwap; } @@ -1509,7 +1771,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *C = 0, *D = 0; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0, *V3 = 0; + Value *V1 = 0, *V2 = 0; C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) @@ -1567,25 +1829,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } } - - // Check to see if we have any common things being and'ed. If so, find the - // terms for V1 & (V2|V3). - if (Op0->hasOneUse() || Op1->hasOneUse()) { - V1 = 0; - if (A == B) // (A & C)|(A & D) == A & (C|D) - V1 = A, V2 = C, V3 = D; - else if (A == D) // (A & C)|(B & A) == A & (B|C) - V1 = A, V2 = B, V3 = C; - else if (C == B) // (A & C)|(C & D) == C & (A|D) - V1 = C, V2 = A, V3 = D; - else if (C == D) // (A & C)|(B & C) == C & (A|B) - V1 = C, V2 = A, V3 = B; - - if (V1) { - Value *Or = Builder->CreateOr(V2, V3, "tmp"); - return BinaryOperator::CreateAnd(V1, Or); - } - } // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. // Don't do this for vector select idioms, the code generator doesn't handle @@ -1667,65 +1910,69 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVectorTy()) { - Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); - - if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && - // Only do this if the casts both really cause code to be - // generated. - ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && - ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { - Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - - // If this is or(cast(icmp), cast(icmp)), try to fold this even if the - // cast is otherwise not optimizable. This happens for vector sexts. - if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) - if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) - if (Value *Res = FoldOrOfICmps(LHS, RHS)) - return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - - // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the - // cast is otherwise not optimizable. This happens for vector sexts. - if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) - if (Value *Res = FoldOrOfFCmps(LHS, RHS)) - return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + CastInst *Op1C = dyn_cast<CastInst>(Op1); + if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && + // Only do this if the casts both really cause code to be + // generated. + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); } + } + } + + // Note: If we've gotten to the point of visiting the outer OR, then the + // inner one couldn't be simplified. If it was a constant, then it won't + // be simplified by a later pass either, so we try swapping the inner/outer + // ORs in the hopes that we'll be able to simplify it this way. + // (X|C) | V --> (X|V) | C + if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) && + match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { + Value *Inner = Builder->CreateOr(A, Op1); + Inner->takeName(Op0); + return BinaryOperator::CreateOr(Inner, C1); } return Changed ? &I : 0; } Instruction *InstCombiner::visitXor(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) { - if (isa<UndefValue>(Op0)) - // Handle undef ^ undef -> 0 special case. This is a common - // idiom (misuse). - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef - } + if (Value *V = SimplifyXorInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // (A&B)^(A&C) -> A&(B^C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - // xor X, X = 0 - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (I.getType()->isVectorTy()) - if (isa<ConstantAggregateZero>(Op1)) - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { @@ -1844,15 +2091,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return NV; } - if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 - if (X == Op1) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 - if (X == Op0) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); if (Op1I) { Value *A, *B; @@ -1865,10 +2103,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { I.swapOperands(); // Simplified below. std::swap(Op0, Op1); } - } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // A^(A^B) == B - } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { - return ReplaceInstUsesWith(I, A); // A^(B^A) == B } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ if (A == Op0) { // A^(A&B) -> A^(B&A) @@ -1891,10 +2125,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); - } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // (A^B)^A == B - } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { - return ReplaceInstUsesWith(I, A); // (B^A)^A == B } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A @@ -1932,29 +2162,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if ((A == C && B == D) || (A == D && B == C)) return BinaryOperator::CreateXor(A, B); } - - // (A & B)^(C & D) - if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && - match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - // (X & Y)^(X & Y) -> (Y^Z) & X - Value *X = 0, *Y = 0, *Z = 0; - if (A == C) - X = A, Y = B, Z = D; - else if (A == D) - X = A, Y = B, Z = C; - else if (B == C) - X = B, Y = A, Z = D; - else if (B == D) - X = B, Y = A, Z = C; - - if (X) { - Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); - return BinaryOperator::CreateAnd(NewOp, X); - } - } } - + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0ebe3b4..8449f7b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -17,6 +17,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; /// getPromotedType - Return the specified type promoted as it would be to pass @@ -29,100 +30,10 @@ static const Type *getPromotedType(const Type *Ty) { return Ty; } -/// EnforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -/// -static unsigned EnforceKnownAlignment(Value *V, - unsigned Align, unsigned PrefAlign) { - - User *U = dyn_cast<User>(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa<Constant>(*i) || - !cast<Constant>(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - return Align; - } - case Instruction::Alloca: { - AllocaInst *AI = cast<AllocaInst>(V); - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - return AI->getAlignment(); - AI->setAlignment(PrefAlign); - return PrefAlign; - } - } - - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // If there is a large requested alignment and we can, bump up the alignment - // of the global. - if (GV->isDeclaration()) return Align; - - if (GV->getAlignment() >= PrefAlign) - return GV->getAlignment(); - // We can only increase the alignment of the global if it has no alignment - // specified or if it is not assigned a section. If it is assigned a - // section, the global could be densely packed with other objects in the - // section, increasing the alignment could cause padding issues. - if (!GV->hasSection() || GV->getAlignment() == 0) - GV->setAlignment(PrefAlign); - return GV->getAlignment(); - } - - return Align; -} - -/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that -/// we can determine, return it, otherwise return 0. If PrefAlign is specified, -/// and it is more than the alignment of the ultimate object, see if we can -/// increase the alignment of the ultimate object, making this check succeed. -unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign) { - assert(V->getType()->isPointerTy() && - "GetOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne); - unsigned TrailZ = KnownZero.countTrailingOnes(); - - // Avoid trouble with rediculously large TrailZ values, such as - // those computed from a null pointer. - TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - - // LLVM doesn't support alignments larger than this currently. - Align = std::min(Align, +Value::MaximumAlignment); - - if (PrefAlign > Align) - Align = EnforceKnownAlignment(V, Align, PrefAlign); - - // We don't need to make any adjustment. - return Align; -} Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1)); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -211,7 +122,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + unsigned Alignment = getKnownAlignment(MI->getDest(), TD); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -234,7 +145,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); - Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); + unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); + Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); + Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; @@ -280,7 +193,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // memmove/cpy/set of zero bytes is a noop. if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { - if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); + if (NumBytes->isNullValue()) + return EraseInstFromFunction(CI); if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) if (CI->getZExtValue() == 1) { @@ -289,6 +203,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // alignment is sufficient. } } + + // No other transformations apply to volatile transfers. + if (MI->isVolatile()) + return 0; // If we have a memmove and the source operation is a constant global, // then the source and dest pointers can't alias, so we can change this @@ -332,82 +250,73 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (!TD) break; const Type *ReturnTy = CI.getType(); - bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; // Get to the real allocated thing and offset as fast as possible. Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - + + uint64_t Offset = 0; + uint64_t Size = -1ULL; + + // Try to look through constant GEPs. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) { + if (!GEP->hasAllConstantIndices()) break; + + // Get the current byte offset into the thing. Use the original + // operand in case we're looking through a bitcast. + SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), + Ops.data(), Ops.size()); + + Op1 = GEP->getPointerOperand()->stripPointerCasts(); + + // Make sure we're not a constant offset from an external + // global. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) + if (!GV->hasDefinitiveInitializer()) break; + } + // If we've stripped down to a single global variable that we // can know the size of then just return that. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { if (GV->hasDefinitiveInitializer()) { Constant *C = GV->getInitializer(); - uint64_t GlobalSize = TD->getTypeAllocSize(C->getType()); - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize)); + Size = TD->getTypeAllocSize(C->getType()); } else { // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); return ReplaceInstUsesWith(CI, RetVal); } } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { // Get alloca size. if (AI->getAllocatedType()->isSized()) { - uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + Size = TD->getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); if (!C) break; - AllocaSize *= C->getZExtValue(); + Size *= C->getZExtValue(); } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize)); } } else if (CallInst *MI = extractMallocCall(Op1)) { + // Get allocation size. const Type* MallocType = getMallocAllocatedType(MI); - // Get alloca size. - if (MallocType && MallocType->isSized()) { - if (Value *NElems = getMallocArraySize(MI, TD, true)) { + if (MallocType && MallocType->isSized()) + if (Value *NElems = getMallocArraySize(MI, TD, true)) if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, - (NElements->getZExtValue() * TD->getTypeAllocSize(MallocType)))); - } - } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { - // Only handle constant GEPs here. - if (CE->getOpcode() != Instruction::GetElementPtr) break; - GEPOperator *GEP = cast<GEPOperator>(CE); - - // Make sure we're not a constant offset from an external - // global. - Value *Operand = GEP->getPointerOperand(); - Operand = Operand->stripPointerCasts(); - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) - if (!GV->hasDefinitiveInitializer()) break; - - // Get what we're pointing to and its size. - const PointerType *BaseType = - cast<PointerType>(Operand->getType()); - uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType()); - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end()); - const PointerType *OffsetType = - cast<PointerType>(GEP->getPointerOperand()->getType()); - uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size()); - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - return ReplaceInstUsesWith(CI, RetVal); - } - - Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); - return ReplaceInstUsesWith(CI, RetVal); - } + Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType); + } // Do not return "I don't know" here. Later optimization passes could // make it possible to evaluate objectsize to a constant. - break; + if (Size == -1ULL) + break; + + if (Size < Offset) { + // Out of bound reference? Negative index normalized to large + // index? Just return "I don't know". + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); + } + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -604,7 +513,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_loadu_dq: // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); @@ -613,7 +522,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { const Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); @@ -624,16 +533,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { const Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); } break; - - case Intrinsic::x86_sse_cvttss2si: { - // These intrinsics only demands the 0th element of its input vector. If + + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: { + // These intrinsics only demand the 0th element of their input vectors. If // we can simplify the input based on that, do so now. unsigned VWidth = cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); @@ -646,7 +562,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { @@ -697,6 +613,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD); + unsigned AlignArg = II->getNumArgOperands() - 1; + ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); + if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { + II->setArgOperand(AlignArg, + ConstantInt::get(Type::getInt32Ty(II->getContext()), + MemAlign, false)); + return II; + } + break; + } + case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. @@ -783,6 +725,8 @@ protected: NewInstruction = IC->ReplaceInstUsesWith(*CI, With); } bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { if (SizeCI->isAllOnesValue()) @@ -819,11 +763,11 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { Instruction *InstCombiner::visitCallSite(CallSite CS) { bool Changed = false; - // If the callee is a constexpr cast of a function, attempt to move the cast - // to the arguments of the call/invoke. - if (transformConstExprCastCall(CS)) return 0; - + // If the callee is a pointer to a function, attempt to move any casts to the + // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); + if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) + return 0; if (Function *CalleeF = dyn_cast<Function>(Callee)) // If the call and callee calling conventions don't match, this call must @@ -917,12 +861,10 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // attempt to move the cast to the arguments of the call/invoke. // bool InstCombiner::transformConstExprCastCall(CallSite CS) { - if (!isa<ConstantExpr>(CS.getCalledValue())) return false; - ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); - if (CE->getOpcode() != Instruction::BitCast || - !isa<Function>(CE->getOperand(0))) + Function *Callee = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (Callee == 0) return false; - Function *Callee = cast<Function>(CE->getOperand(0)); Instruction *Caller = CS.getInstruction(); const AttrListPtr &CallerPAL = CS.getAttributes(); @@ -984,9 +926,22 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - if (CallerPAL.getParamAttributes(i + 1) - & Attribute::typeIncompatible(ParamTy)) + unsigned Attrs = CallerPAL.getParamAttributes(i + 1); + if (Attrs & Attribute::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. + + // If the parameter is passed as a byval argument, then we have to have a + // sized type and the sized type has to have the same size as the old type. + if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { + const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); + if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) + return false; + + const Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); + if (TD->getTypeAllocSize(CurElTy) != + TD->getTypeAllocSize(ParamPTy->getElementType())) + return false; + } // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. @@ -1109,8 +1064,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { if (!NV->getType()->isVoidTy()) { - Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, - OldRetTy, false); + Instruction::CastOps opcode = + CastInst::getCastOpcode(NC, false, OldRetTy, false); NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); // If this is an invoke instruction, we should insert it after the first @@ -1119,7 +1074,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); InsertNewInstBefore(NC, *I); } else { - // Otherwise, it's a call, just insert cast right after the call instr + // Otherwise, it's a call, just insert cast right after the call. InsertNewInstBefore(NC, *Caller); } Worklist.AddUsersToWorkList(*Caller); @@ -1128,7 +1083,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } } - if (!Caller->use_empty()) Caller->replaceAllUsesWith(NV); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 79a9b09..b432641 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -462,8 +462,8 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = 0; ConstantInt *Cst = 0; - if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) && - Src->hasOneUse()) { + if (Src->hasOneUse() && + match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { // We have three types to worry about here, the type of A, the source of // the truncate (MidSize), and the destination of the truncate. We know that // ASize < MidSize and MidSize > ResultSize, but don't know the relation @@ -482,6 +482,16 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), false); } + + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest + // type isn't non-native. + if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && + ShouldChangeType(Src->getType(), CI.getType()) && + match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) { + Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr"); + return BinaryOperator::CreateAnd(NewTrunc, + ConstantExpr::getTrunc(Cst, CI.getType())); + } return 0; } @@ -1019,8 +1029,22 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { } } } - - + + // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. + if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) { + ICmpInst::Predicate Pred; Value *CmpLHS; + if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) { + if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) { + const Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + } + // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth // integers, we could turn this into a truncate to the smaller bit and then @@ -1363,8 +1387,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, ConstantInt::get(Int32Ty, SrcElts)); } - Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size()); - return new ShuffleVectorInst(InVal, V2, Mask); + return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); } static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d7e2b72..999de34 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -22,13 +22,17 @@ using namespace llvm; using namespace PatternMatch; +static ConstantInt *getOne(Constant *C) { + return ConstantInt::get(cast<IntegerType>(C->getType()), 1); +} + /// AddOne - Add one to a ConstantInt static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +static Constant *SubOne(Constant *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { @@ -160,8 +164,8 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, Max = KnownOne|UnknownBits; if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.set(Min.getBitWidth()-1); - Max.clear(Max.getBitWidth()-1); + Min.setBit(Min.getBitWidth()-1); + Max.clearBit(Max.getBitWidth()-1); } } @@ -694,13 +698,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, if (Pred == ICmpInst::ICMP_NE) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, // so the values can never be equal. Similiarly for all other "or equals" // operators. @@ -709,10 +706,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - Value *R = ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); @@ -721,12 +714,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+1) >u X --> X <u (0-1) --> X != 255 // (X+2) >u X --> X <u (0-2) --> X <u 254 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); ConstantInt *SMax = ConstantInt::get(X->getContext(), @@ -738,16 +727,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 @@ -756,13 +737,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); @@ -782,7 +756,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even // (x /u C1) <u C2. Simply casting the operands and result won't // work. :( The if statement below tests that condition and bails - // if it finds it. + // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) return 0; @@ -790,9 +764,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, return 0; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. + if (DivRHS->isOne()) { + // This eliminates some funny cases with INT_MIN. + ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. + return &ICI; + } // Compute Prod = CI * DivRHS. We are essentially solving an equation // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and @@ -809,6 +785,10 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // Get the ICmp opcode ICmpInst::Predicate Pred = ICI.getPredicate(); + /// If the division is known to be exact, then there is no remainder from the + /// divide, so the covered range size is unit, otherwise it is the divisor. + ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; + // Figure out the interval that is being checked. For example, a comparison // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // Compute this interval based on the constants involved and the signedness of @@ -818,38 +798,43 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; Constant *LoBound = 0, *HiBound = 0; - + if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + if (!HiOverflow) { + // If this is not an exact divide, then many values in the range collapse + // to the same result value. + HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); + } + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; + LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + HiBound = RangeSize; } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { - ConstantInt* DivNeg = - cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; - } + } } } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + if (DivI->isExact()) + RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + LoBound = AddOne(RangeSize); + HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN @@ -859,12 +844,12 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiBound = AddOne(Prod); HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; } else { // (X / neg) op neg LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) LoOverflow = HiOverflow = ProdOV; if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); } // Dividing by a negative swaps the condition. LT <-> GT @@ -883,9 +868,8 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); - return ReplaceInstUsesWith(ICI, - InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true)); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); @@ -908,13 +892,100 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow == -1) // High bound less than input range. + if (HiOverflow == -1) // High bound less than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + +/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". +Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, + ConstantInt *ShAmt) { + const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = CmpRHSV.getBitWidth(); + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + if (ShAmtVal >= TypeBits || ShAmtVal == 0) + return 0; + + if (!ICI.isEquality()) { + // If we have an unsigned comparison and an ashr, we can't simplify this. + // Similarly for signed comparisons with lshr. + if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) + return 0; + + // Otherwise, all lshr and all exact ashr's are equivalent to a udiv/sdiv by + // a power of 2. Since we already have logic to simplify these, transform + // to div and then simplify the resultant comparison. + if (Shr->getOpcode() == Instruction::AShr && + !Shr->isExact()) + return 0; + + // Revisit the shift (to delete it). + Worklist.Add(Shr); + + Constant *DivCst = + ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); + + Value *Tmp = + Shr->getOpcode() == Instruction::AShr ? + Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : + Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); + + ICI.setOperand(0, Tmp); + + // If the builder folded the binop, just return it. + BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); + if (TheDiv == 0) + return &ICI; + + // Otherwise, fold this div/compare. + assert(TheDiv->getOpcode() == Instruction::SDiv || + TheDiv->getOpcode() == Instruction::UDiv); + + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); + assert(Res && "This div/cst should have folded!"); + return Res; + } + + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = CmpRHSV << ShAmtVal; + ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp); + if (Shr->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (Shr->hasOneUse() && Shr->isExact()) + return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); + + if (Shr->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(ICI.getContext(), Val); + + Value *And = Builder->CreateAnd(Shr->getOperand(0), + Mask, Shr->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); } + return 0; } @@ -939,8 +1010,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. - APInt NewRHS(RHS->getValue()); - NewRHS.zext(SrcBits); + APInt NewRHS = RHS->getValue().zext(SrcBits); NewRHS |= KnownOne; return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantInt::get(ICI.getContext(), NewRHS)); @@ -1022,10 +1092,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { uint32_t BitWidth = cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue(); - NewCST.zext(BitWidth); - APInt NewCI = RHSV; - NewCI.zext(BitWidth); + APInt NewCST = AndCST->getValue().zext(BitWidth); + APInt NewCI = RHSV.zext(BitWidth); Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), ConstantInt::get(ICI.getContext(), NewCST), @@ -1145,7 +1213,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). - Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, Constant::getNullValue(P->getType())); Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, @@ -1185,6 +1252,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, Cst); } + // If the shift is NUW, then it is just shifting out zeros, no need for an + // AND. + if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); @@ -1195,8 +1268,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Value *And = Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(ICI.getContext(), - RHSV.lshr(ShAmtVal))); + ConstantExpr::getLShr(RHS, ShAmt)); } } @@ -1205,8 +1277,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) <s 0 --> (X&1) != 0 - Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); + Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), + APInt::getOneBitSet(TypeBits, + TypeBits-ShAmt->getZExtValue()-1)); Value *And = Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, @@ -1216,57 +1289,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: { + case Instruction::AShr: // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); - } + if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) + if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI), + ShAmt)) + return Res; break; - } case Instruction::SDiv: case Instruction::UDiv: @@ -1543,50 +1572,174 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // The re-extended constant changed so the constant cannot be represented // in the shorter type. Consequently, we cannot emit a simple comparison. + // All the cases that fold to true or false will have already been handled + // by SimplifyICmpInst, so only deal with the tricky case. - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (isSignedCmp || !isSignedExt) + return 0; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast<ConstantInt>(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false - else - Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(ICI.getContext()); - } - } + + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) + if (ICI.getPredicate() == ICmpInst::ICMP_ULT) return ReplaceInstUsesWith(ICI, Result); - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast<Constant>(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); return BinaryOperator::CreateNot(Result); } +/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: +/// I = icmp ugt (add (add A, B), CI2), CI1 +/// If this is of the form: +/// sum = a + b +/// if (sum+128 >u 255) +/// Then replace it with llvm.sadd.with.overflow.i8. +/// +static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, + ConstantInt *CI2, ConstantInt *CI1, + InstCombiner &IC) { + // The transformation we're trying to do here is to transform this into an + // llvm.sadd.with.overflow. To do this, we have to replace the original add + // with a narrower add, and discard the add-with-constant that is part of the + // range check (if we can't eliminate it, this isn't profitable). + + // In order to eliminate the add-with-constant, the compare can be its only + // use. + Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); + if (!AddWithCst->hasOneUse()) return 0; + + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. + if (!CI2->getValue().isPowerOf2()) return 0; + unsigned NewWidth = CI2->getValue().countTrailingZeros(); + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + + // The width of the new add formed is 1 more than the bias. + ++NewWidth; + + // Check to see that CI1 is an all-ones value with NewWidth bits. + if (CI1->getBitWidth() == NewWidth || + CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) + return 0; + + // In order to replace the original add with a narrower + // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant + // and truncates that discard the high bits of the add. Verify that this is + // the case. + Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); + for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); + UI != E; ++UI) { + if (*UI == AddWithCst) continue; + + // Only accept truncates for now. We would really like a nice recursive + // predicate like SimplifyDemandedBits, but which goes downwards the use-def + // chain to see which bits of a value are actually demanded. If the + // original add had another add which was then immediately truncated, we + // could still do the transformation. + TruncInst *TI = dyn_cast<TruncInst>(*UI); + if (TI == 0 || + TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + } + + // If the pattern matches, truncate the inputs to the narrower type and + // use the sadd_with_overflow intrinsic to efficiently compute both the + // result and the overflow bit. + Module *M = I.getParent()->getParent()->getParent(); + + const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + &NewType, 1); + + InstCombiner::BuilderTy *Builder = IC.Builder; + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + Builder->SetInsertPoint(OrigAdd); + + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); + Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); + CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); + Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + + // The inner add was the result of the narrow add, zero extended to the + // wider type. Replace it with the result computed by the intrinsic. + IC.ReplaceInstUsesWith(*OrigAdd, ZExt); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "sadd.overflow"); +} + +static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, + InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa<IntegerType>(OrigAddV->getType())) return 0; + + // If the add is a constant expr, then we don't bother transforming it. + Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); + if (OrigAdd == 0) return 0; + + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(OrigAdd); + + Module *M = I.getParent()->getParent()->getParent(); + const Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1); + CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); + Value *Add = Builder->CreateExtractValue(Call, 0); + IC.ReplaceInstUsesWith(*OrigAdd, Add); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "uadd.overflow"); +} + +// DemandedBitsLHSMask - When performing a comparison against a constant, +// it is possible that not all the bits in the LHS are demanded. This helper +// method computes the mask that IS demanded. +static APInt DemandedBitsLHSMask(ICmpInst &I, + unsigned BitWidth, bool isSignCheck) { + if (isSignCheck) + return APInt::getSignBit(BitWidth); + + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + if (!CI) return APInt::getAllOnesValue(BitWidth); + const APInt &RHS = CI->getValue(); + + switch (I.getPredicate()) { + // For a UGT comparison, we don't care about any bits that + // correspond to the trailing ones of the comparand. The value of these + // bits doesn't impact the outcome of the comparison, because any value + // greater than the RHS must differ in a bit higher than these due to carry. + case ICmpInst::ICMP_UGT: { + unsigned trailingOnes = RHS.countTrailingOnes(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); + return ~lowBitsSet; + } + + // Similarly, for a ULT comparison, we don't care about the trailing zeros. + // Any value less than the RHS must differ in a higher bit because of carries. + case ICmpInst::ICMP_ULT: { + unsigned trailingZeros = RHS.countTrailingZeros(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); + return ~lowBitsSet; + } + + default: + return APInt::getAllOnesValue(BitWidth); + } + +} Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; @@ -1649,17 +1802,37 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVectorTy()) + if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); - + else if (TD) // Pointers require TD info to get their size. + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); + bool isSignBit = false; // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { Value *A = 0, *B = 0; + // Match the following pattern, which is a common idiom when writing + // overflow-safe integer arithmetic function. The source performs an + // addition in wider type, and explicitly checks for overflow using + // comparisons against INT_MIN and INT_MAX. Simplify this by using the + // sadd_with_overflow intrinsic. + // + // TODO: This could probably be generalized to handle other overflow-safe + // operations if we worked out the formulas to compute the appropriate + // magic constants. + // + // sum = a + b + // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 + { + ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) + if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) + return Res; + } + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { @@ -1704,8 +1877,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), + DemandedBitsLHSMask(I, BitWidth, isSignBit), Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), @@ -1744,14 +1916,80 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); - case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) == 0" into "x != 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) == 0" into "x != 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + break; - case ICmpInst::ICMP_NE: + } + case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) != 0" into "x == 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) != 0" into "x == 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + break; + } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); @@ -1894,7 +2132,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::Select: { @@ -1995,79 +2233,163 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *R = visitICmpInstWithCastAndCast(I)) return R; } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } + + // Special logic for binary operators. + BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); + BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); + if (BO0 || BO1) { + CmpInst::Predicate Pred = I.getPredicate(); + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; + if (BO0 && isa<OverflowingBinaryOperator>(BO0)) + NoOp0WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); + if (BO1 && isa<OverflowingBinaryOperator>(BO1)) + NoOp1WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); + + // Analyze the case when either Op0 or Op1 is an add instruction. + // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Add) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Add) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == Op1 || B == Op1) && NoOp0WrapProblem) + return new ICmpInst(Pred, A == Op1 ? B : A, + Constant::getNullValue(Op1->getType())); + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == Op0 || D == Op0) && NoOp1WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), + C == Op0 ? D : C); + + // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y = (A == C || A == D) ? B : A; + Value *Z = (C == A || C == B) ? D : C; + return new ICmpInst(Pred, Y, Z); + } + + // Analyze the case when either Op0 or Op1 is a sub instruction. + // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). + A = 0; B = 0; C = 0; D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Sub) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Sub) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + if (A == Op1 && NoOp0WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); + + // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + if (C == Op0 && NoOp1WrapProblem) + return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); + + // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, A, C); + + // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, D, B); + + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && + BO0->hasOneUse() && BO1->hasOneUse() && + BO0->getOperand(1) == BO1->getOperand(1)) { + switch (BO0->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); + } + + if (CI->getValue().isMaxSignedValue()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); } + } + break; + case Instruction::Mul: + if (!I.isEquality()) break; - case Instruction::Mul: - if (!I.isEquality()) - break; - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); } - break; } + break; } } } - // ~x < ~y --> y < x { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); + // ~x < ~y --> y < x + // ~x < cst --> ~cst < x + if (match(Op0, m_Not(m_Value(A)))) { + if (match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) + return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + } + + // (a+b) <u a --> llvm.uadd.with.overflow. + // (a+b) <u b --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_ULT && + match(Op0, m_Add(m_Value(A), m_Value(B))) && + (Op1 == A || Op1 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) + return R; + + // a >u (a+b) --> llvm.uadd.with.overflow. + // b >u (a+b) --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op1, m_Add(m_Value(A), m_Value(B))) && + (Op0 == A || Op0 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) + return R; } if (I.isEquality()) { Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; @@ -2102,16 +2424,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Constant::getNullValue(A->getType())); } - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_And(m_Value(A), m_Value(B))) && @@ -2397,7 +2709,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::SIToFP: diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b68fbc2..78ff734 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -145,7 +145,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : TD->getABITypeAlignment(LI.getType()); @@ -165,7 +165,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (LI.isVolatile()) return 0; // Do really simple store-to-load forwarding and load CSE, to catch cases - // where there are several consequtive memory accesses to the same location, + // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) @@ -330,7 +330,9 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); - return new StoreInst(NewCast, CastOp); + SI.setOperand(0, NewCast); + SI.setOperand(1, CastOp); + return &SI; } /// equivalentAddressValues - Test if A and B will obviously have the same @@ -414,7 +416,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), + TD); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : TD->getABITypeAlignment(Val->getType()); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index b3974e8..d1a1fd6 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -14,26 +14,22 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; -/// SubOne - Subtract one from a ConstantInt. -static Constant *SubOne(ConstantInt *C) { - return ConstantInt::get(C->getContext(), C->getValue()-1); -} - /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { uint32_t W = C1->getBitWidth(); APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); if (sign) { - LHSExt.sext(W * 2); - RHSExt.sext(W * 2); + LHSExt = LHSExt.sext(W * 2); + RHSExt = RHSExt.sext(W * 2); } else { - LHSExt.zext(W * 2); - RHSExt.zext(W * 2); + LHSExt = LHSExt.zext(W * 2); + RHSExt = RHSExt.zext(W * 2); } APInt MulExt = LHSExt * RHSExt; @@ -47,62 +43,48 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { } Instruction *InstCombiner::visitMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // undef * X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + if (Value *V = SimplifyMulInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { - - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); - - if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 - if (CI->equalsInt(1)) // X * 1 == X - return ReplaceInstUsesWith(I, Op0); - if (CI->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); - - const APInt& Val = cast<ConstantInt>(CI)->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - return BinaryOperator::CreateShl(Op0, - ConstantInt::get(Op0->getType(), Val.logBase2())); - } - } else if (Op1C->getType()->isVectorTy()) { - if (Op1C->isNullValue()) - return ReplaceInstUsesWith(I, Op1C); - - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - if (Op1V->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) - if (CI->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - } - } + if (match(Op1, m_AllOnes())) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); + + const APInt &Val = CI->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); + BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst); + if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); + if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); + return Shl; } - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) - if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { - // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); - Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); - return BinaryOperator::CreateAdd(Add, C1C2); - + // Canonicalize (X+C1)*CI -> X*CI+C1*CI. + { Value *X; ConstantInt *C1; + if (Op0->hasOneUse() && + match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { + Value *Add = Builder->CreateMul(X, CI, "tmp"); + return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } - + } + } + + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -135,8 +117,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { BO->getOpcode() == Instruction::SDiv)) { Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - // If the division is exact, X % Y is zero. - if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) + // If the division is exact, X % Y is zero, so we end up with X or -X. + if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO)) if (SDiv->isExact()) { if (Op1BO == Op1C) return ReplaceInstUsesWith(I, Op0BO); @@ -194,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } Instruction *InstCombiner::visitFMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Simplify mul instructions with a constant RHS... @@ -304,28 +286,6 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { } -/// This function implements the transforms on div instructions that work -/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is -/// used by the visitors to those instructions. -/// @brief Transforms common to all three div instructions -Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // undef / X -> 0 for integer. - // undef / X -> undef for FP (the undef could be a snan). - if (isa<UndefValue>(Op0)) { - if (Op0->getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // X / undef -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); - - return 0; -} - /// This function implements the transforms common to both integer division /// instructions (udiv and sdiv). It is called by the visitors to those integer /// division instructions. @@ -333,31 +293,12 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // (sdiv X, X) --> 1 (udiv X, X) --> 1 - if (Op0 == Op1) { - if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - Constant *CI = ConstantInt::get(Ty->getElementType(), 1); - std::vector<Constant*> Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); - } - - Constant *CI = ConstantInt::get(I.getType(), 1); - return ReplaceInstUsesWith(I, CI); - } - - if (Instruction *Common = commonDivTransforms(I)) - return Common; - // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // div X, 1 == X - if (RHS->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - // (X / C1) / C2 -> X / (C1*C2) if (Instruction *LHS = dyn_cast<Instruction>(Op0)) if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) @@ -365,9 +306,8 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - else - return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -380,20 +320,13 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - // 0 / X == 0, we don't need to preserve faults! - if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) - if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // It can't be division by zero, hence it must be division by one. - if (I.getType()->isIntegerTy(1)) - return ReplaceInstUsesWith(I, Op0); - - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { - if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) - // div X, 1 == X - if (X->isOne()) - return ReplaceInstUsesWith(I, Op0); + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y + Value *X = 0, *Z = 0; + if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 + bool isSigned = I.getOpcode() == Instruction::SDiv; + if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || + (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) + return BinaryOperator::Create(I.getOpcode(), X, Op1); } return 0; @@ -402,6 +335,9 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyUDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; @@ -410,60 +346,59 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 - return BinaryOperator::CreateLShr(Op0, + if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2 + BinaryOperator *LShr = + BinaryOperator::CreateLShr(Op0, ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + if (I.isExact()) LShr->setIsExact(); + return LShr; + } // X udiv C, where C >= signbit if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT( Op0, C); + Value *IC = Builder->CreateICmpULT(Op0, C); return SelectInst::Create(IC, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); } } // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) - if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); - if (C1.isPowerOf2()) { - Value *N = RHSI->getOperand(1); - const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) - N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); - return BinaryOperator::CreateLShr(Op0, N); - } + { const APInt *CI; Value *N; + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { + if (*CI != 1) + N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()), + "tmp"); + if (I.isExact()) + return BinaryOperator::CreateExactLShr(Op0, N); + return BinaryOperator::CreateLShr(Op0, N); } } // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); - if (TVA.isPowerOf2() && FVA.isPowerOf2()) { - // Compute the shift amounts - uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); - // Construct the "on true" case of the select - Constant *TC = ConstantInt::get(Op0->getType(), TSA); - Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + // Construct the "on true" case of the select + Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", + I.isExact()); - // Construct the "on false" case of the select - Constant *FC = ConstantInt::get(Op0->getType(), FSA); - Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); - - // construct the select instruction and return it. - return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); - } - } + // Construct the "on false" case of the select + Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", + I.isExact()); + + // construct the select instruction and return it. + return SelectInst::Create(Cond, TSI, FSI); + } + } return 0; } Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; @@ -473,20 +408,17 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (RHS->isAllOnesValue()) return BinaryOperator::CreateNeg(Op0); - // sdiv X, C --> ashr X, log2(C) - if (cast<SDivOperator>(&I)->isExact() && - RHS->getValue().isNonNegative() && + // sdiv X, C --> ashr exact X, log2(C) + if (I.isExact() && RHS->getValue().isNonNegative() && RHS->getValue().isPowerOf2()) { Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), RHS->getValue().exactLogBase2()); - return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); + return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName()); } // -X/C --> X/-C provided the negation doesn't overflow. if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) - if (isa<Constant>(Sub->getOperand(0)) && - cast<Constant>(Sub->getOperand(0))->isNullValue() && - Sub->hasNoSignedWrap()) + if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap()) return BinaryOperator::CreateSDiv(Sub->getOperand(1), ConstantExpr::getNeg(RHS)); } @@ -500,9 +432,8 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } - ConstantInt *ShiftedInt; - if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && - ShiftedInt->getValue().isPowerOf2()) { + + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -516,7 +447,12 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { - return commonDivTransforms(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + return 0; } /// This function implements the transforms on rem instructions that work @@ -551,6 +487,10 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { if (Instruction *common = commonRemTransforms(I)) return common; + // X % X == 0 + if (Op0 == Op1) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // 0 % X == 0 for integer, we don't need to preserve faults! if (Constant *LHS = dyn_cast<Constant>(Op0)) if (LHS->isNullValue()) @@ -588,42 +528,29 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Instruction *common = commonIRemTransforms(I)) return common; - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X urem C^2 -> X and C - // Check to see if this is an unsigned remainder with an exact power of 2, - // if so, convert to a bitwise and. - if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) - if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C)); + // X urem C^2 -> X and C-1 + { const APInt *C; + if (match(Op1, m_Power2(C))) + return BinaryOperator::CreateAnd(Op0, + ConstantInt::get(I.getType(), *C-1)); } - if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); - return BinaryOperator::CreateAnd(Op0, Add); - } - } + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(Op1, N1, "tmp"); + return BinaryOperator::CreateAnd(Op0, Add); } - // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - // STO == 0 and SFO == 0 handled above. - if ((STO->getValue().isPowerOf2()) && - (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), - SI->getName()+".t"); - Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), - SI->getName()+".f"); - return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); - } - } + // urem X, (select Cond, 2^C1, 2^C2) --> + // select Cond, (and X, C1-1), (and X, C2-1) + // when C1&C2 are powers of two. + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f"); + return SelectInst::Create(Cond, TrueAnd, FalseAnd); + } } return 0; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index f7fc62f..297a18c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" @@ -30,22 +31,37 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { const Type *LHSType = LHSVal->getType(); const Type *RHSType = RHSVal->getType(); + bool isNUW = false, isNSW = false, isExact = false; + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); if (!I || I->getOpcode() != Opc || !I->hasOneUse() || // Verify type of the LHS matches so we don't fold cmp's of different - // types or GEP's with different index types. + // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) return 0; // If they are CmpInst instructions, check their predicates - if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) - if (cast<CmpInst>(I)->getPredicate() != - cast<CmpInst>(FirstInst)->getPredicate()) + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) return 0; + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); + // Keep track of which operand needs a phi node. if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; @@ -96,11 +112,17 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { } } - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); - CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); + + BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst); + BinaryOperator *NewBinOp = + BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + if (isNUW) NewBinOp->setHasNoUnsignedWrap(); + if (isNSW) NewBinOp->setHasNoSignedWrap(); + if (isExact) NewBinOp->setIsExact(); + return NewBinOp; } Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { @@ -117,6 +139,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // especially bad when the PHIs are in the header of a loop. bool NeededPhi = false; + bool AllInBounds = true; + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); @@ -124,6 +148,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GEP->getNumOperands() != FirstInst->getNumOperands()) return 0; + AllInBounds &= GEP->isInBounds(); + // Keep track of whether or not all GEPs are of alloca pointers. if (AllBasePointersAreAllocas && (!isa<AllocaInst>(GEP->getOperand(0)) || @@ -201,11 +227,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { } Value *Base = FixedOperands[0]; - return cast<GEPOperator>(FirstInst)->isInBounds() ? - GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, - FixedOperands.end()) : + GetElementPtrInst *NewGEP = GetElementPtrInst::Create(Base, FixedOperands.begin()+1, FixedOperands.end()); + if (AllInBounds) NewGEP->setIsInBounds(); + return NewGEP; } @@ -368,6 +394,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // code size and simplifying code. Constant *ConstantOp = 0; const Type *CastSrcTy = 0; + bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); @@ -384,6 +411,14 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); if (ConstantOp == 0) return FoldPHIArgBinOpIntoPHI(PN); + + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); } else { return 0; // Cannot fold this operation. } @@ -399,6 +434,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { } else if (I->getOperand(1) != ConstantOp) { return 0; } + + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); } // Okay, they are all the same operation. Create a new PHI node of the @@ -433,8 +475,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { + BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (isNUW) BinOp->setHasNoUnsignedWrap(); + if (isNSW) BinOp->setHasNoSignedWrap(); + if (isExact) BinOp->setIsExact(); + return BinOp; + } CmpInst *CIOp = cast<CmpInst>(FirstInst); return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), @@ -731,8 +778,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { Instruction *InstCombiner::visitPHINode(PHINode &PN) { // If LCSSA is around, don't mess with Phi nodes if (MustPreserveLCSSA) return 0; - - if (Value *V = PN.hasConstantValue()) + + if (Value *V = SimplifyInstruction(&PN, TD)) return ReplaceInstUsesWith(PN, V); // If all PHI operands are the same operation, pull them through the PHI, diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index c44fe9d..97abc76 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -24,14 +24,14 @@ static SelectPatternFlavor MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); if (SI == 0) return SPF_UNKNOWN; - + ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); if (ICI == 0) return SPF_UNKNOWN; - + LHS = ICI->getOperand(0); RHS = ICI->getOperand(1); - - // (icmp X, Y) ? X : Y + + // (icmp X, Y) ? X : Y if (SI->getTrueValue() == ICI->getOperand(0) && SI->getFalseValue() == ICI->getOperand(1)) { switch (ICI->getPredicate()) { @@ -46,8 +46,8 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { case ICmpInst::ICMP_SLE: return SPF_SMIN; } } - - // (icmp X, Y) ? Y : X + + // (icmp X, Y) ? Y : X if (SI->getTrueValue() == ICI->getOperand(1) && SI->getFalseValue() == ICI->getOperand(0)) { switch (ICI->getPredicate()) { @@ -62,9 +62,9 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { case ICmpInst::ICMP_SLE: return SPF_SMAX; } } - + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - + return SPF_UNKNOWN; } @@ -136,7 +136,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), FI->getOperand(0), SI.getName()+".v"); InsertNewInstBefore(NewSI, SI); - return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -195,7 +195,10 @@ static bool isSelect01(Constant *C1, Constant *C2) { ConstantInt *C2I = dyn_cast<ConstantInt>(C2); if (!C2I) return false; - return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); + if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. + return false; + return C1I->isOne() || C1I->isAllOnesValue() || + C2I->isOne() || C2I->isAllOnesValue(); } /// FoldSelectIntoOp - Try fold the select into one of the operands to @@ -219,7 +222,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Constant *C = GetSelectFoldableConstant(TVI); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. + // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); InsertNewInstBefore(NewSel, SI); @@ -248,7 +251,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Constant *C = GetSelectFoldableConstant(FVI); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. + // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); InsertNewInstBefore(NewSel, SI); @@ -278,52 +281,95 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, Value *FalseVal = SI.getFalseValue(); // Check cases where the comparison is with a constant that - // can be adjusted to fit the min/max idiom. We may edit ICI in - // place here, so make sure the select is the only user. + // can be adjusted to fit the min/max idiom. We may move or edit ICI + // here, so make sure the select is the only user. if (ICI->hasOneUse()) if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { + // X < MIN ? T : F --> F + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) + && CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > MAX ? T : F --> F + else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT) + && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); switch (Pred) { default: break; case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: { - // X < MIN ? T : F --> F - if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = - ConstantInt::get(CI->getContext(), CI->getValue()-1); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } + case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: { - // X > MAX ? T : F --> F - if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); + // These transformations only work for selects over integers. + const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); + if (!SelectTy) + break; + + Constant *AdjustedRHS; + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1); + else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1); + // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = - ConstantInt::get(CI->getContext(), CI->getValue()+1); + // X < C ? X : C-1 --> X > C-1 ? C-1 : X if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) + ; // Nothing to do here. Values match without any sign/zero extension. + + // Types do not match. Instead of calculating this with mixed types + // promote all to the larger type. This enables scalar evolution to + // analyze this expression. + else if (CmpRHS->getType()->getScalarSizeInBits() + < SelectTy->getBitWidth()) { + Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy); + + // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X + // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X + // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X + // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X + if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = sextRHS; + } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = sextRHS; + } else if (ICI->isUnsigned()) { + Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy); + // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X + // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X + // zext + signed compare cannot be changed: + // 0xff <s 0x00, but 0x00ff >s 0x0000 + if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = zextRHS; + } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = zextRHS; + } else + break; + } else + break; + } else + break; + + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(0, CmpLHS); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + + // Move ICI instruction right before the select instruction. Otherwise + // the sext/zext value may be defined after the ICI instruction uses it. + ICI->moveBefore(&SI); + + Changed = true; break; } } @@ -399,28 +445,28 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return true; - + // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); - + if (const PHINode *VP = dyn_cast<PHINode>(I)) if (VP->getParent() == CondPHI->getParent()) return true; - + // Otherwise, if the PHI and select are defined in the same block and if V is // defined in a different block, then we can transform it. if (SI.getParent() == CondPHI->getParent() && I->getParent() != CondPHI->getParent()) return true; - + // Otherwise we have a 'hard' case and we can't tell without doing more // detailed dominator based analysis, punt. return false; } /// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: -/// SPF2(SPF1(A, B), C) +/// SPF2(SPF1(A, B), C) Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, @@ -431,7 +477,7 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, // MIN(MIN(a, b), a) -> MIN(a, b) if (SPF1 == SPF2) return ReplaceInstUsesWith(Outer, Inner); - + // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || @@ -440,13 +486,82 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) return ReplaceInstUsesWith(Outer, C); } - + // TODO: MIN(MIN(A, 23), 97) return 0; } +/// foldSelectICmpAnd - If one of the constants is zero (we know they can't +/// both be) and we have an icmp instruction with zero, and we have an 'and' +/// with the non-constant value and a power of two we can turn the select +/// into a shift on the result of the 'and'. +static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, + ConstantInt *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality()) + return 0; + + if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1))) + if (!C->isZero()) + return 0; + ConstantInt *AndRHS; + Value *LHS = IC->getOperand(0); + if (LHS->getType() != SI.getType() || + !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) + return 0; + + // If both select arms are non-zero see if we have a select of the form + // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic + // for 'x ? 2^n : 0' and fix the thing up at the end. + ConstantInt *Offset = 0; + if (!TrueVal->isZero() && !FalseVal->isZero()) { + if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + Offset = FalseVal; + else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + Offset = TrueVal; + else + return 0; + + // Adjust TrueVal and FalseVal to the offset. + TrueVal = ConstantInt::get(Builder->getContext(), + TrueVal->getValue() - Offset->getValue()); + FalseVal = ConstantInt::get(Builder->getContext(), + FalseVal->getValue() - Offset->getValue()); + } + + // Make sure the mask in the 'and' and one of the select arms is a power of 2. + if (!AndRHS->getValue().isPowerOf2() || + (!TrueVal->getValue().isPowerOf2() && + !FalseVal->getValue().isPowerOf2())) + return 0; + + // Determine which shift is needed to transform result of the 'and' into the + // desired result. + ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; + unsigned ValZeros = ValC->getValue().logBase2(); + unsigned AndZeros = AndRHS->getValue().logBase2(); + + Value *V = LHS; + if (ValZeros > AndZeros) + V = Builder->CreateShl(V, ValZeros - AndZeros); + else if (ValZeros < AndZeros) + V = Builder->CreateLShr(V, AndZeros - ValZeros); + + // Okay, now we know that everything is set up, we just don't know whether we + // have a icmp_ne or icmp_eq and whether the true or false val is the zero. + bool ShouldNotVal = !TrueVal->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + if (ShouldNotVal) + V = Builder->CreateXor(V, ValC); + + // Apply an offset if needed. + if (Offset) + V = Builder->CreateAdd(V, Offset); + return V; +} Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); @@ -478,7 +593,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { "not."+CondVal->getName()), SI); return BinaryOperator::CreateOr(NotCond, TrueVal); } - + // select a, b, a -> a&b // select a, a, b -> a|b if (CondVal == TrueVal) @@ -497,7 +612,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select C, -1, 0 -> sext C to int if (FalseValC->isZero() && TrueValC->isAllOnesValue()) return new SExtInst(CondVal, SI.getType()); - + // select C, 0, 1 -> zext !C to int if (TrueValC->isZero() && FalseValC->getValue() == 1) { Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); @@ -509,32 +624,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return new SExtInst(NotCond, SI.getType()); } - - if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { - // If one of the constants is zero (we know they can't both be) and we - // have an icmp instruction with zero, and we have an 'and' with the - // non-constant value, eliminate this whole mess. This corresponds to - // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isZero() || FalseValC->isZero()) - if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && - cast<Constant>(IC->getOperand(1))->isNullValue()) - if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) - if (ICA->getOpcode() == Instruction::And && - isa<ConstantInt>(ICA->getOperand(1)) && - (ICA->getOperand(1) == TrueValC || - ICA->getOperand(1) == FalseValC) && - cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) { - // Okay, now we know that everything is set up, we just don't - // know whether we have a icmp_ne or icmp_eq and whether the - // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isZero(); - ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; - Value *V = ICA; - if (ShouldNotVal) - V = Builder->CreateXor(V, ICA->getOperand(1)); - return ReplaceInstUsesWith(SI, V); - } - } + + if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + return ReplaceInstUsesWith(SI, V); } // See if we are selecting two values based on a comparison of the two values. @@ -542,7 +634,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -554,7 +646,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // Transform (X une Y) ? X : Y -> X if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -569,7 +661,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ // Transform (X == Y) ? Y : X -> X if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -581,7 +673,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // Transform (X une Y) ? Y : X -> Y if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -639,6 +731,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *NegVal; // Compute -Z if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { NegVal = ConstantExpr::getNeg(C); + } else if (SI.getType()->isFloatingPointTy()) { + NegVal = InsertNewInstBefore( + BinaryOperator::CreateFNeg(SubOp->getOperand(1), + "tmp"), SI); } else { NegVal = InsertNewInstBefore( BinaryOperator::CreateNeg(SubOp->getOperand(1), @@ -654,7 +750,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { NewFalseOp, SI.getName() + ".p"); NewSel = InsertNewInstBefore(NewSel, SI); - return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + if (SI.getType()->isFloatingPointTy()) + return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); + else + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); } } } @@ -663,7 +762,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SI.getType()->isIntegerTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; - + // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a @@ -686,13 +785,26 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // See if we can fold the select into a phi node if the condition is a select. - if (isa<PHINode>(SI.getCondition())) + if (isa<PHINode>(SI.getCondition())) // The true/false values have to be live in the PHI predecessor's blocks. if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) if (Instruction *NV = FoldOpIntoPhi(SI)) return NV; + if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { + if (TrueSI->getCondition() == CondVal) { + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + } + if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { + if (FalseSI->getCondition() == CondVal) { + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + } + if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); SI.setOperand(1, FalseVal); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 27716b8..a7f8005 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -21,25 +22,6 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // shl X, 0 == X and shr X, 0 == X - // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Constant::getNullValue(Op1->getType()) || - Op0 == Constant::getNullValue(Op0->getType())) - return ReplaceInstUsesWith(I, Op0); - - if (isa<UndefValue>(Op0)) { - if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef - return ReplaceInstUsesWith(I, Op0); - else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) { - if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X - return ReplaceInstUsesWith(I, Op0); - else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - // See if we can fold away this shift. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -53,6 +35,20 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; + + // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. + // Because shifts by negative values (which could occur if A were negative) + // are undefined. + Value *A; const APInt *B; + if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { + // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't + // demand the sign bit (and many others) here?? + Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), + Op1->getName()); + I.setOperand(1, Rem); + return &I; + } + return 0; } @@ -81,7 +77,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. // TODO: Handle opposite shift by exact value. - ConstantInt *CI; + ConstantInt *CI = 0; if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { if (CI->getZExtValue() == NumBits) { @@ -131,9 +127,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. if (CI->getZExtValue() > NumBits) { - unsigned HighBits = CI->getZExtValue() - NumBits; + unsigned LowBits = TypeWidth - CI->getZExtValue(); if (MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(TypeWidth, HighBits))) + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } @@ -157,7 +153,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, if (CI->getZExtValue() > NumBits) { unsigned LowBits = CI->getZExtValue() - NumBits; if (MaskedValueIsZero(I->getOperand(0), - APInt::getLowBitsSet(TypeWidth, LowBits))) + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } @@ -622,16 +618,49 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } Instruction *InstCombiner::visitShl(BinaryOperator &I) { - return commonShiftTransforms(I); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), + I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *V = commonShiftTransforms(I)) + return V; + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { + unsigned ShAmt = Op1C->getZExtValue(); + + // If the shifted-out value is known-zero, then this is a NUW shift. + if (!I.hasNoUnsignedWrap() && + MaskedValueIsZero(I.getOperand(0), + APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { + I.setHasNoUnsignedWrap(); + return &I; + } + + // If the shifted out value is all signbits, this is a NSW shift. + if (!I.hasNoSignedWrap() && + ComputeNumSignBits(I.getOperand(0)) > ShAmt) { + I.setHasNoSignedWrap(); + return &I; + } + } + + return 0; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *R = commonShiftTransforms(I)) return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); // ctlz.i32(x)>>5 --> zext(x == 0) @@ -640,7 +669,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { if ((II->getIntrinsicID() == Intrinsic::ctlz || II->getIntrinsicID() == Intrinsic::cttz || II->getIntrinsicID() == Intrinsic::ctpop) && - isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ + isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) { bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); @@ -648,29 +677,37 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } } + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } + } + return 0; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *R = commonShiftTransforms(I)) return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) { - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - } - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we // have a sign-extend idiom. Value *X; if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { - // If the input value is known to already be sign extended enough, delete - // the extension. - if (ComputeNumSignBits(X) > Op1C->getZExtValue()) + // If the left shift is just shifting out partial signbits, delete the + // extension. + if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) return ReplaceInstUsesWith(I, X); // If the input is an extension from the shifted amount value, e.g. @@ -685,6 +722,13 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return new SExtInst(ZI->getOperand(0), ZI->getType()); } } + + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } } // See if we can turn a signed shr into an unsigned shr. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index adf7a76..bda8cea 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -34,7 +34,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, if (!OpC) return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); if ((~Demanded & OpC->getValue()) == 0) return false; @@ -121,13 +121,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } if (isa<ConstantPointerNull>(V)) { // We know all of the bits for a constant! - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = DemandedMask; return 0; } - KnownZero.clear(); - KnownOne.clear(); + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); if (DemandedMask == 0) { // Not demanding any bits from V. if (isa<UndefValue>(V)) return 0; @@ -388,15 +388,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Trunc: { unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.zext(truncBf); - KnownZero.zext(truncBf); - KnownOne.zext(truncBf); + DemandedMask = DemandedMask.zext(truncBf); + KnownZero = KnownZero.zext(truncBf); + KnownOne = KnownOne.zext(truncBf); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - DemandedMask.trunc(BitWidth); - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); + DemandedMask = DemandedMask.trunc(BitWidth); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; } @@ -426,15 +426,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + DemandedMask = DemandedMask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - DemandedMask.zext(BitWidth); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + DemandedMask = DemandedMask.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // The top bits are known to be zero. KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); @@ -451,17 +451,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. if ((NewBits & DemandedMask) != 0) - InputDemandedBits.set(SrcBitWidth-1); + InputDemandedBits.setBit(SrcBitWidth-1); - InputDemandedBits.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, KnownOne, Depth+1)) return I; - InputDemandedBits.zext(BitWidth); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + InputDemandedBits = InputDemandedBits.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -576,8 +576,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + + // If the shift is NUW/NSW, then it does demand the high bits. + ShlOperator *IOp = cast<ShlOperator>(I); + if (IOp->hasNoSignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (IOp->hasNoUnsignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -592,10 +600,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::LShr: // For a logical shift right if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<LShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -627,14 +641,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I->getOperand(0); if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); // If any of the "high bits" are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.set(BitWidth-1); + DemandedMaskIn.setBit(BitWidth-1); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<AShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -793,10 +813,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned i = 0; i != VWidth; ++i) if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); - UndefElts.set(i); + UndefElts.setBit(i); } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef. Elts.push_back(Undef); - UndefElts.set(i); + UndefElts.setBit(i); } else { // Otherwise, defined. Elts.push_back(CV->getOperand(i)); } @@ -879,13 +899,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Otherwise, the element inserted overwrites whatever was there, so the // input demanded set is simpler than the output set. APInt DemandedElts2 = DemandedElts; - DemandedElts2.clear(IdxNo); + DemandedElts2.clearBit(IdxNo); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, UndefElts, Depth+1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. - UndefElts.clear(IdxNo); + UndefElts.clearBit(IdxNo); break; } case Instruction::ShuffleVector: { @@ -900,9 +920,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, assert(MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!"); if (MaskVal < LHSVWidth) - LeftDemanded.set(MaskVal); + LeftDemanded.setBit(MaskVal); else - RightDemanded.set(MaskVal - LHSVWidth); + RightDemanded.setBit(MaskVal - LHSVWidth); } } } @@ -921,16 +941,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { - UndefElts.set(i); + UndefElts.setBit(i); } else if (MaskVal < LHSVWidth) { if (UndefElts4[MaskVal]) { NewUndefElts = true; - UndefElts.set(i); + UndefElts.setBit(i); } } else { if (UndefElts3[MaskVal - LHSVWidth]) { NewUndefElts = true; - UndefElts.set(i); + UndefElts.setBit(i); } } } @@ -973,7 +993,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Ratio = VWidth/InVWidth; for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { if (DemandedElts[OutIdx]) - InputDemandedElts.set(OutIdx/Ratio); + InputDemandedElts.setBit(OutIdx/Ratio); } } else { // Untested so far. @@ -985,7 +1005,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Ratio = InVWidth/VWidth; for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) if (DemandedElts[InIdx/Ratio]) - InputDemandedElts.set(InIdx); + InputDemandedElts.setBit(InIdx); } // div/rem demand all inputs, because they don't want divide by zero. @@ -1004,7 +1024,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) if (UndefElts2[OutIdx/Ratio]) - UndefElts.set(OutIdx); + UndefElts.setBit(OutIdx); } else if (VWidth < InVWidth) { llvm_unreachable("Unimp"); // If there are more elements in the source than there are in the result, @@ -1013,7 +1033,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clear(InIdx/Ratio); // Clear undef bit. + UndefElts.clearBit(InIdx/Ratio); // Clear undef bit. } break; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index a58124d..5caa12d 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -18,7 +18,7 @@ using namespace llvm; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa<ConstantAggregateZero>(V)) + if (isa<ConstantAggregateZero>(V)) return true; if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { if (isConstant) return true; @@ -31,7 +31,7 @@ static bool CheapToScalarize(Value *V, bool isConstant) { } Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // Insert element gets simplified to the inserted element or is deleted if // this is constant idx extract element and its a constant idx insertelt. if (I->getOpcode() == Instruction::InsertElement && isConstant && @@ -49,26 +49,24 @@ static bool CheapToScalarize(Value *V, bool isConstant) { (CheapToScalarize(CI->getOperand(0), isConstant) || CheapToScalarize(CI->getOperand(1), isConstant))) return true; - + return false; } -/// Read and decode a shufflevector mask. -/// -/// It turns undef elements into values that are larger than the number of -/// elements in the input. -static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { +/// getShuffleMask - Read and decode a shufflevector mask. +/// Turn undef elements into negative values. +static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) { unsigned NElts = SVI->getType()->getNumElements(); if (isa<ConstantAggregateZero>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 0); + return std::vector<int>(NElts, 0); if (isa<UndefValue>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 2*NElts); - - std::vector<unsigned> Result; + return std::vector<int>(NElts, -1); + + std::vector<int> Result; const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) if (isa<UndefValue>(*i)) - Result.push_back(NElts*2); // undef -> 8 + Result.push_back(-1); // undef else Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); return Result; @@ -83,42 +81,41 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); - + if (isa<UndefValue>(V)) return UndefValue::get(PTy->getElementType()); if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(PTy->getElementType()); if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) return CP->getOperand(EltNo); - + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. - if (!isa<ConstantInt>(III->getOperand(2))) + if (!isa<ConstantInt>(III->getOperand(2))) return 0; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); - + // If this is an insert to the element we are looking for, return the // inserted value. - if (EltNo == IIElt) + if (EltNo == IIElt) return III->getOperand(1); - + // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return FindScalarElement(III->getOperand(0), EltNo); } - + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - unsigned InEl = getShuffleMask(SVI)[EltNo]; - if (InEl < LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl); - else if (InEl < LHSWidth*2) - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); - else + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + int InEl = getShuffleMask(SVI)[EltNo]; + if (InEl < 0) return UndefValue::get(PTy->getElementType()); + if (InEl < (int)LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } - + // Otherwise, we don't know. return 0; } @@ -127,11 +124,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If vector val is undef, replace extract with scalar undef. if (isa<UndefValue>(EI.getOperand(0))) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - + // If vector val is constant 0, replace extract with scalar 0. if (isa<ConstantAggregateZero>(EI.getOperand(0))) return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - + if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { // If vector val is constant with all elements the same, replace EI with // that element. When the elements are not identical, we cannot replace yet @@ -139,53 +136,53 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Constant *op0 = C->getOperand(0); for (unsigned i = 1; i != C->getNumOperands(); ++i) if (C->getOperand(i) != op0) { - op0 = 0; + op0 = 0; break; } if (op0) return ReplaceInstUsesWith(EI, op0); } - + // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - + // If this is extracting an invalid index, turn this into undef, to avoid // crashing the code below. if (IndexVal >= VectorWidth) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - + // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use // property. if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { APInt UndefElts(VectorWidth, 0); APInt DemandedMask(VectorWidth, 0); - DemandedMask.set(IndexVal); + DemandedMask.setBit(IndexVal); if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, UndefElts)) { EI.setOperand(0, V); return &EI; } } - + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) return ReplaceInstUsesWith(EI, Elt); - + // If the this extractelement is directly using a bitcast from a vector of // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (const VectorType *VT = + if (const VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); } } - + if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { // Push extractelement into predecessor operation if legal and // profitable to do so @@ -193,11 +190,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (I->hasOneUse() && CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); } } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { @@ -215,21 +212,22 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; Value *Src; unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - - if (SrcIdx < LHSWidth) + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + + if (SrcIdx < 0) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + if (SrcIdx < (int)LHSWidth) Src = SVI->getOperand(0); - else if (SrcIdx < LHSWidth*2) { + else { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); - } else { - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); } + const Type *Int32Ty = Type::getInt32Ty(EI.getContext()); return ExtractElementInst::Create(Src, - ConstantInt::get(Type::getInt32Ty(EI.getContext()), + ConstantInt::get(Int32Ty, SrcIdx, false)); } } @@ -239,42 +237,42 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. +/// elements from either LHS or RHS, return the shuffle mask and true. /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, std::vector<Constant*> &Mask) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - + if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return true; } - + if (V == LHS) { for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); return true; } - + if (V == RHS) { for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i+NumElts)); return true; } - + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); Value *IdxOp = IEI->getOperand(2); - + if (!isa<ConstantInt>(IdxOp)) return false; unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. // Okay, we can handle this if the vector we are insertinting into is // transitively ok. @@ -282,13 +280,13 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // If so, update the mask to reflect the inserted undef. Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); return true; - } + } } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ if (isa<ConstantInt>(EI->getOperand(1)) && EI->getOperand(0)->getType() == V->getType()) { unsigned ExtractedIdx = cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); - + // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { // Okay, we can handle this if the vector we are insertinting into is @@ -296,15 +294,14 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted value. if (EI->getOperand(0) == LHS) { - Mask[InsertedIdx % NumElts] = + Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), ExtractedIdx); } else { assert(EI->getOperand(0) == RHS); - Mask[InsertedIdx % NumElts] = + Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), ExtractedIdx+NumElts); - } return true; } @@ -313,7 +310,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, } } // TODO: Handle shufflevector here! - + return false; } @@ -322,11 +319,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// that computes V and the LHS value of the shuffle. static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, Value *&RHS) { - assert(V->getType()->isVectorTy() && + assert(V->getType()->isVectorTy() && (RHS == 0 || V->getType() == RHS->getType()) && "Invalid shuffle!"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - + if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; @@ -338,25 +335,25 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); Value *IdxOp = IEI->getOperand(2); - + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && EI->getOperand(0)->getType() == V->getType()) { unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. if (EI->getOperand(0) == RHS || RHS == 0) { RHS = EI->getOperand(0); Value *V = CollectShuffleElements(VecOp, Mask, RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+ExtractedIdx); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+ExtractedIdx); return V; } - + if (VecOp == RHS) { Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); // Everything but the extracted element is replaced with the RHS. @@ -367,7 +364,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, } return V; } - + // If this insertelement is a chain that comes from exactly these two // vectors, return the vector and the effective shuffle. if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) @@ -376,7 +373,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, } } // TODO: Handle shufflevector here! - + // Otherwise, can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); @@ -387,32 +384,32 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); - + // Inserting an undef or into an undefined place, remove this. if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) ReplaceInstUsesWith(IE, VecOp); - - // If the inserted element was extracted from some other vector, and if the + + // If the inserted element was extracted from some other vector, and if the // indexes are constant, try to turn this into a shufflevector operation. if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && EI->getOperand(0)->getType() == IE.getType()) { unsigned NumVectorElts = IE.getType()->getNumElements(); unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + if (ExtractedIdx >= NumVectorElts) // Out of range extract. return ReplaceInstUsesWith(IE, VecOp); - + if (InsertedIdx >= NumVectorElts) // Out of range insert. return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); - + // If we are extracting a value from a vector, then inserting it right // back into the same place, just use the input vector. if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) - return ReplaceInstUsesWith(IE, VecOp); - + return ReplaceInstUsesWith(IE, VecOp); + // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { @@ -421,18 +418,20 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *LHS = CollectShuffleElements(&IE, Mask, RHS); if (RHS == 0) RHS = UndefValue::get(LHS->getType()); // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, - ConstantVector::get(Mask)); + return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); } } } - + unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (V != &IE) + return ReplaceInstUsesWith(IE, V); return &IE; - + } + return 0; } @@ -440,27 +439,29 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - std::vector<unsigned> Mask = getShuffleMask(&SVI); - + std::vector<int> Mask = getShuffleMask(&SVI); + bool MadeChange = false; - + // Undefined shuffle mask -> undefined value. if (isa<UndefValue>(SVI.getOperand(2))) return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - + unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); - + if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) return 0; - + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (V != &SVI) + return ReplaceInstUsesWith(SVI, V); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } - + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa<UndefValue>(LHS)) { @@ -468,16 +469,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // shuffle(undef,undef,mask) -> undef. return ReplaceInstUsesWith(SVI, LHS); } - + // Remap any references to RHS to use LHS. std::vector<Constant*> Elts; for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= 2*e) + if (Mask[i] < 0) Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); else { - if ((Mask[i] >= e && isa<UndefValue>(RHS)) || - (Mask[i] < e && isa<UndefValue>(LHS))) { - Mask[i] = 2*e; // Turn into undef. + if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || + (Mask[i] < (int)e && isa<UndefValue>(LHS))) { + Mask[i] = -1; // Turn into undef. Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); } else { Mask[i] = Mask[i] % e; // Force to LHS. @@ -493,59 +494,65 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { RHS = SVI.getOperand(1); MadeChange = true; } - + // Analyze the shuffle, are the LHS or RHS and identity shuffles? bool isLHSID = true, isRHSID = true; - + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e*2) continue; // Ignore undef values. + if (Mask[i] < 0) continue; // Ignore undef values. // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == i); - + isLHSID &= (Mask[i] == (int)i); + // Is this an identity shuffle of the RHS value? isRHSID &= (Mask[i]-e == i); } - + // Eliminate identity shuffles. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - + // If the LHS is a shufflevector itself, see if we can combine it with this // one without producing an unusual shuffle. Here we are really conservative: // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is one of the two input shuffle - // masks. In this case, merging the shuffles just removes one instruction, - // which we know is safe. This is good for things like turning: - // (splat(splat)) -> splat. + // we only merge two shuffles if the result is either a splat or one of the + // two input shuffle masks. In this case, merging the shuffles just removes + // one instruction, which we know is safe. This is good for things like + // turning: (splat(splat)) -> splat. if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { if (isa<UndefValue>(RHS)) { - std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); - + std::vector<int> LHSMask = getShuffleMask(LHSSVI); + if (LHSMask.size() == Mask.size()) { - std::vector<unsigned> NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); + std::vector<int> NewMask; + bool isSplat = true; + int SplatElt = -1; // undef + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + int MaskElt; + if (Mask[i] < 0 || Mask[i] >= (int)e) + MaskElt = -1; // undef else - NewMask.push_back(LHSMask[Mask[i]]); - + MaskElt = LHSMask[Mask[i]]; + // Check if this could still be a splat. + if (MaskElt >= 0) { + if (SplatElt >=0 && SplatElt != MaskElt) + isSplat = false; + SplatElt = MaskElt; + } + NewMask.push_back(MaskElt); + } + // If the result mask is equal to the src shuffle or this // shuffle mask, do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - unsigned LHSInNElts = - cast<VectorType>(LHSSVI->getOperand(0)->getType())-> - getNumElements(); + if (isSplat || NewMask == LHSMask || NewMask == Mask) { std::vector<Constant*> Elts; + const Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get( - Type::getInt32Ty(SVI.getContext()))); + if (NewMask[i] < 0) { + Elts.push_back(UndefValue::get(Int32Ty)); } else { - Elts.push_back(ConstantInt::get( - Type::getInt32Ty(SVI.getContext()), - NewMask[i])); + Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i])); } } return new ShuffleVectorInst(LHSSVI->getOperand(0), @@ -555,7 +562,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } } } - + return MadeChange ? &SVI : 0; } - diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index e46c679..37123d0 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/PatternMatch.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm-c/Initialization.h" #include <algorithm> #include <climits> using namespace llvm; @@ -57,11 +58,22 @@ STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc , "Number of reassociations"); +// Initialization Routines +void llvm::initializeInstCombine(PassRegistry &Registry) { + initializeInstCombinerPass(Registry); +} + +void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { + initializeInstCombine(*unwrap(R)); +} char InstCombiner::ID = 0; INITIALIZE_PASS(InstCombiner, "instcombine", - "Combine redundant instructions", false, false); + "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreservedID(LCSSAID); @@ -97,53 +109,326 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { } -// SimplifyCommutative - This performs a few simplifications for commutative -// operators: +/// SimplifyAssociativeOrCommutative - This performs a few simplifications for +/// operators which are associative or commutative: +// +// Commutative operators: // // 1. Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. // -// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) -// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) +// Associative operators: +// +// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. +// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. +// +// Associative and commutative operators: +// +// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. +// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. +// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" +// if C1 and C2 are constants. // -bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { +bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { + Instruction::BinaryOps Opcode = I.getOpcode(); bool Changed = false; - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) - Changed = !I.swapOperands(); - if (!I.isAssociative()) return Changed; - - Instruction::BinaryOps Opcode = I.getOpcode(); - if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) - if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { - if (isa<Constant>(I.getOperand(1))) { - Constant *Folded = ConstantExpr::get(I.getOpcode(), - cast<Constant>(I.getOperand(1)), - cast<Constant>(Op->getOperand(1))); - I.setOperand(0, Op->getOperand(0)); - I.setOperand(1, Folded); - return true; + do { + // Order operands such that they are listed from right (least complex) to + // left (most complex). This puts constants before unary operators before + // binary operators. + if (I.isCommutative() && getComplexity(I.getOperand(0)) < + getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); + + if (I.isAssociative()) { + // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) { + // It simplifies to V. Form "A op V". + I.setOperand(0, A); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } } - - if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1))) - if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && - Op->hasOneUse() && Op1->hasOneUse()) { - Constant *C1 = cast<Constant>(Op->getOperand(1)); - Constant *C2 = cast<Constant>(Op1->getOperand(1)); - - // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), - Op1->getOperand(0), - Op1->getName(), &I); - Worklist.Add(New); - I.setOperand(0, New); - I.setOperand(1, Folded); - return true; + + // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) { + // It simplifies to V. Form "V op C". + I.setOperand(0, V); + I.setOperand(1, C); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; } + } } - return Changed; + + if (I.isAssociative() && I.isCommutative()) { + // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "V op B". + I.setOperand(0, V); + I.setOperand(1, B); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "B op V". + I.setOperand(0, B); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" + // if C1 and C2 are constants. + if (Op0 && Op1 && + Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && + isa<Constant>(Op0->getOperand(1)) && + isa<Constant>(Op1->getOperand(1)) && + Op0->hasOneUse() && Op1->hasOneUse()) { + Value *A = Op0->getOperand(0); + Constant *C1 = cast<Constant>(Op0->getOperand(1)); + Value *B = Op1->getOperand(0); + Constant *C2 = cast<Constant>(Op1->getOperand(1)); + + Constant *Folded = ConstantExpr::get(Opcode, C1, C2); + Instruction *New = BinaryOperator::Create(Opcode, A, B, Op1->getName(), + &I); + Worklist.Add(New); + I.setOperand(0, New); + I.setOperand(1, Folded); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + continue; + } + } + + // No further simplifications. + return Changed; + } while (1); +} + +/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to +/// "(X LOp Y) ROp (X LOp Z)". +static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + switch (LOp) { + default: + return false; + + case Instruction::And: + // And distributes over Or and Xor. + switch (ROp) { + default: + return false; + case Instruction::Or: + case Instruction::Xor: + return true; + } + + case Instruction::Mul: + // Multiplication distributes over addition and subtraction. + switch (ROp) { + default: + return false; + case Instruction::Add: + case Instruction::Sub: + return true; + } + + case Instruction::Or: + // Or distributes over And. + switch (ROp) { + default: + return false; + case Instruction::And: + return true; + } + } +} + +/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to +/// "(X ROp Z) LOp (Y ROp Z)". +static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + if (Instruction::isCommutative(ROp)) + return LeftDistributesOverRight(ROp, LOp); + // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", + // but this requires knowing that the addition does not overflow and other + // such subtleties. + return false; +} + +/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations +/// which some other binary operation distributes over either by factorizing +/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this +/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is +/// a win). Returns the simplified value, or null if it didn't simplify. +Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op + + // Factorization. + if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, A, V); + V->takeName(&I); + return V; + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD); + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, V, B); + V->takeName(&I); + return V; + } + } + } + + // Expansion. + if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { + // The instruction has the form "(A op' B) op C". See if expanding it out + // to "(A op C) op' (B op C)" results in simplifications. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || + (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) + return Op0; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + C = Builder->CreateBinOp(InnerOpcode, L, R); + C->takeName(&I); + return C; + } + } + + if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { + // The instruction has the form "A op (B op' C)". See if expanding it out + // to "(A op B) op' (A op C)" results in simplifications. + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' + + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || + (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) + return Op1; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + A = Builder->CreateBinOp(InnerOpcode, L, R); + A->takeName(&I); + return A; + } + } + + return 0; } // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction @@ -185,8 +470,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const { static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner *IC) { - if (CastInst *CI = dyn_cast<CastInst>(&I)) + if (CastInst *CI = dyn_cast<CastInst>(&I)) { return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + } // Figure out if the constant is the left or the right argument. bool ConstIsRHS = isa<Constant>(I.getOperand(1)); @@ -228,11 +514,24 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Bool selects with constant operands can be folded to logical ops. if (SI->getType()->isIntegerTy(1)) return 0; + // If it's a bitcast involving vectors, make sure it has the same number of + // elements on both sides. + if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) { + const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); + const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); + + // Verify that either both or neither are vectors. + if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + // If vectors, verify that they have the same number of elements. + if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) + return 0; + } + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); - return SelectInst::Create(SI->getCondition(), SelectTrueVal, - SelectFalseVal); + return SelectInst::Create(SI->getCondition(), + SelectTrueVal, SelectFalseVal); } return 0; } @@ -242,20 +541,25 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { /// has a PHI node as operand #0, see if we can fold the instruction into the /// PHI (which is only possible if all operands to the PHI are constants). /// -/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms -/// that would normally be unprofitable because they strongly encourage jump -/// threading. -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, - bool AllowAggressive) { - AllowAggressive = false; +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || - // We normally only transform phis with a single use, unless we're trying - // hard to make jump threading happen. - (!PN->hasOneUse() && !AllowAggressive)) + if (NumPHIValues == 0) return 0; + // We normally only transform phis with a single use. However, if a PHI has + // multiple uses and they are all the same operation, we can fold *all* of the + // uses into the PHI. + if (!PN->hasOneUse()) { + // Walk the use list for the instruction, comparing them to I. + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User != &I && !I.isIdenticalTo(User)) + return 0; + } + // Otherwise, we can replace *all* users with the new PHI we form. + } // Check to see if all of the operands of the PHI are simple constants // (constantint/constantfp/undef). If there is one non-constant value, @@ -263,24 +567,34 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. BasicBlock *NonConstBB = 0; - for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa<Constant>(PN->getIncomingValue(i)) || - isa<ConstantExpr>(PN->getIncomingValue(i))) { - if (NonConstBB) return 0; // More than one non-const value. - if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. - NonConstBB = PN->getIncomingBlock(i); - - // If the incoming non-constant value is in I's block, we have an infinite - // loop. - if (NonConstBB == I.getParent()) + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal)) + continue; + + if (isa<PHINode>(InVal)) return 0; // Itself a phi. + if (NonConstBB) return 0; // More than one non-const value. + + NonConstBB = PN->getIncomingBlock(i); + + // If the InVal is an invoke at the end of the pred block, then we can't + // insert a computation after it without breaking the edge. + if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) + if (II->getParent() == NonConstBB) return 0; - } + + // If the incoming non-constant value is in I's block, we will remove one + // instruction, but insert another equivalent one, leading to infinite + // instcombine. + if (NonConstBB == I.getParent()) + return 0; + } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0 && !AllowAggressive) { + if (NonConstBB != 0) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); if (!BI || !BI->isUnconditional()) return 0; } @@ -290,7 +604,12 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, NewPN->reserveOperandSpace(PN->getNumOperands()/2); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); - + + // If we are going to have to insert a new computation, do so right before the + // predecessors terminator. + if (NonConstBB) + Builder->SetInsertPoint(NonConstBB->getTerminator()); + // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { // We only currently try to fold the condition of a select when it is a phi, @@ -303,42 +622,36 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, - "phitmp", NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } + else + InV = Builder->CreateSelect(PN->getIncomingValue(i), + TrueVInPred, FalseVInPred, "phitmp"); NewPN->addIncoming(InV, ThisBB); } + } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { + Constant *C = cast<Constant>(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else if (isa<ICmpInst>(CI)) + InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + else + InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } } else if (I.getNumOperands() == 2) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - InV = BinaryOperator::Create(BO->getOpcode(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else - llvm_unreachable("Unknown binop!"); - - Worklist.Add(cast<Instruction>(InV)); - } + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::get(I.getOpcode(), InC, C); + else + InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(), + PN->getIncomingValue(i), C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { @@ -346,18 +659,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, const Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), "phitmp", - NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } + else + InV = Builder->CreateCast(CI->getOpcode(), + PN->getIncomingValue(i), I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ) { + Instruction *User = cast<Instruction>(*UI++); + if (User == &I) continue; + ReplaceInstUsesWith(*User, NewPN); + EraseInstFromFunction(*User); + } return ReplaceInstUsesWith(I, NewPN); } @@ -432,28 +749,35 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); - if (isa<UndefValue>(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); - - // Eliminate unneeded casts for indices. + // Eliminate unneeded casts for indices, and replace indices which displace + // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - unsigned PtrSize = TD->getPointerSizeInBits(); - + const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { - if (!isa<SequentialType>(*GTI)) continue; - - // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. This - // explicit cast can make subsequent optimizations more obvious. - unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); - if (OpBits == PtrSize) - continue; - - *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); - MadeChange = true; + // Skip indices into struct types. + const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) continue; + + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + TD->getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); + MadeChange = true; + } + + if ((*I)->getType() != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; + } } if (MadeChange) return &GEP; } @@ -940,6 +1264,14 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } + + // If the normal result of the add is dead, and the RHS is a constant, + // we can transform this into a range comparison. + // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 + if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) + if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1))) + return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), + ConstantExpr::getNot(CI)); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: @@ -964,10 +1296,37 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { } } } - // Can't simplify extracts from other values. Note that nested extracts are - // already simplified implicitely by the above (extract ( extract (insert) ) + if (LoadInst *L = dyn_cast<LoadInst>(Agg)) + // If the (non-volatile) load only has one use, we can rewrite this to a + // load from a GEP. This reduces the size of the load. + // FIXME: If a load is used only by extractvalue instructions then this + // could be done regardless of having multiple uses. + if (!L->isVolatile() && L->hasOneUse()) { + // extractvalue has integer indices, getelementptr has Value*s. Convert. + SmallVector<Value*, 4> Indices; + // Prefix an i32 0 since we need the first element. + Indices.push_back(Builder->getInt32(0)); + for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); + I != E; ++I) + Indices.push_back(Builder->getInt32(*I)); + + // We need to insert these at the location of the old load, not at that of + // the extractvalue. + Builder->SetInsertPoint(L->getParent(), L); + Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), + Indices.begin(), Indices.end()); + // Returning the load directly will cause the main loop to insert it in + // the wrong spot, so use ReplaceInstUsesWith(). + return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); + } + // We could simplify extracts from other values. Note that nested extracts may + // already be simplified implicitly by the above: extract (extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just - // the value inserted, if appropriate). + // the value inserted, if appropriate. Similarly for extracts from single-use + // loads: extract (extract (load)) will be translated to extract (load (gep)) + // and if again single-use then via load (gep (gep)) to load (gep). + // However, double extracts from e.g. function arguments or return values + // aren't handled yet. return 0; } @@ -1023,10 +1382,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); - - std::vector<Instruction*> InstrsForInstCombineWorklist; - InstrsForInstCombineWorklist.reserve(128); + SmallVector<Instruction*, 128> InstrsForInstCombineWorklist; SmallPtrSet<ConstantExpr*, 64> FoldedConstants; do { @@ -1231,6 +1588,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { DEBUG(errs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); + Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp index a77d70c..1d31fcc 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -17,6 +17,7 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-edge-profiling" + #include "ProfilingUtils.h" #include "llvm/Module.h" #include "llvm/Pass.h" @@ -34,7 +35,9 @@ namespace { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - EdgeProfiler() : ModulePass(ID) {} + EdgeProfiler() : ModulePass(ID) { + initializeEdgeProfilerPass(*PassRegistry::getPassRegistry()); + } virtual const char *getPassName() const { return "Edge Profiler"; @@ -44,7 +47,7 @@ namespace { char EdgeProfiler::ID = 0; INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling", - "Insert instrumentation for edge profiling", false, false); + "Insert instrumentation for edge profiling", false, false) ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } @@ -98,7 +101,7 @@ bool EdgeProfiler::runOnModule(Module &M) { // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block - IncrementCounterInBlock(BB, i++, Counters); + IncrementCounterInBlock(BB, i++, Counters, false); } else { // Insert counter at the start of the block IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp new file mode 100644 index 0000000..96ed4fa --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -0,0 +1,32 @@ +//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// Instrumentation library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeInstrumentation - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeInstrumentation(PassRegistry &Registry) { + initializeEdgeProfilerPass(Registry); + initializeOptimalEdgeProfilerPass(Registry); + initializePathProfilerPass(Registry); +} + +/// LLVMInitializeInstrumentation - C binding for +/// initializeInstrumentation. +void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) { + initializeInstrumentation(*unwrap(R)); +} diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 8eec987..c85a1a9 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -36,7 +36,9 @@ namespace { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - OptimalEdgeProfiler() : ModulePass(ID) {} + OptimalEdgeProfiler() : ModulePass(ID) { + initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(ProfileEstimatorPassID); @@ -50,9 +52,14 @@ namespace { } char OptimalEdgeProfiler::ID = 0; -INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling", +INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling", + "Insert optimal instrumentation for edge profiling", + false, false) +INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass) +INITIALIZE_AG_DEPENDENCY(ProfileInfo) +INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling", "Insert optimal instrumentation for edge profiling", - false, false); + false, false) ModulePass *llvm::createOptimalEdgeProfilerPass() { return new OptimalEdgeProfiler(); @@ -125,11 +132,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual // edges (0,entry) and (BB,0) (for blocks with no successors) and this - // edges also participate in the maximum spanning tree calculation. + // edges also participate in the maximum spanning tree calculation. // The third parameter of MaximumSpanningTree() has the effect that not the // actual MST is returned but the edges _not_ in the MST. - ProfileInfo::EdgeWeights ECs = + ProfileInfo::EdgeWeights ECs = getAnalysis<ProfileInfo>(*F).getEdgeWeights(F); std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); MaximumSpanningTree<BasicBlock> MST (EdgeVector); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp new file mode 100644 index 0000000..6449b39 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -0,0 +1,1423 @@ +//===- PathProfiling.cpp - Inserts counters for path profiling ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments functions for Ball-Larus path profiling. Ball-Larus +// profiling converts the CFG into a DAG by replacing backedges with edges +// from entry to the start block and from the end block to exit. The paths +// along the new DAG are enumrated, i.e. each path is given a path number. +// Edges are instrumented to increment the path number register, such that the +// path number register will equal the path number of the path taken at the +// exit. +// +// This file defines classes for building a CFG for use with different stages +// in the Ball-Larus path profiling instrumentation [Ball96]. The +// requirements are formatting the llvm CFG into the Ball-Larus DAG, path +// numbering, finding a spanning tree, moving increments from the spanning +// tree to chords. +// +// Terms: +// DAG - Directed Acyclic Graph. +// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges +// removed in the following manner. For every backedge +// v->w, insert edge ENTRY->w and edge v->EXIT. +// Path Number - The number corresponding to a specific path through a +// Ball-Larus DAG. +// Spanning Tree - A subgraph, S, is a spanning tree if S covers all +// vertices and is a tree. +// Chord - An edge not in the spanning tree. +// +// [Ball96] +// T. Ball and J. R. Larus. "Efficient Path Profiling." +// International Symposium on Microarchitecture, pages 46-57, 1996. +// http://portal.acm.org/citation.cfm?id=243857 +// +// [Ball94] +// Thomas Ball. "Efficiently Counting Program Events with Support for +// On-line queries." +// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5, +// September 1994, Pages 1399-1410. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "insert-path-profiling" + +#include "llvm/DerivedTypes.h" +#include "ProfilingUtils.h" +#include "llvm/Analysis/PathNumbering.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TypeBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include <map> +#include <vector> + +#define HASH_THRESHHOLD 100000 + +using namespace llvm; + +namespace { +class BLInstrumentationNode; +class BLInstrumentationEdge; +class BLInstrumentationDag; + +// --------------------------------------------------------------------------- +// BLInstrumentationNode extends BallLarusNode with member used by the +// instrumentation algortihms. +// --------------------------------------------------------------------------- +class BLInstrumentationNode : public BallLarusNode { +public: + // Creates a new BLInstrumentationNode from a BasicBlock. + BLInstrumentationNode(BasicBlock* BB); + + // Get/sets the Value corresponding to the pathNumber register, + // constant or phinode. Used by the instrumentation code to remember + // path number Values. + Value* getStartingPathNumber(); + void setStartingPathNumber(Value* pathNumber); + + Value* getEndingPathNumber(); + void setEndingPathNumber(Value* pathNumber); + + // Get/set the PHINode Instruction for this node. + PHINode* getPathPHI(); + void setPathPHI(PHINode* pathPHI); + +private: + + Value* _startingPathNumber; // The Value for the current pathNumber. + Value* _endingPathNumber; // The Value for the current pathNumber. + PHINode* _pathPHI; // The PHINode for current pathNumber. +}; + +// -------------------------------------------------------------------------- +// BLInstrumentationEdge extends BallLarusEdge with data about the +// instrumentation that will end up on each edge. +// -------------------------------------------------------------------------- +class BLInstrumentationEdge : public BallLarusEdge { +public: + BLInstrumentationEdge(BLInstrumentationNode* source, + BLInstrumentationNode* target); + + // Sets the target node of this edge. Required to split edges. + void setTarget(BallLarusNode* node); + + // Get/set whether edge is in the spanning tree. + bool isInSpanningTree() const; + void setIsInSpanningTree(bool isInSpanningTree); + + // Get/ set whether this edge will be instrumented with a path number + // initialization. + bool isInitialization() const; + void setIsInitialization(bool isInitialization); + + // Get/set whether this edge will be instrumented with a path counter + // increment. Notice this is incrementing the path counter + // corresponding to the path number register. The path number + // increment is determined by getIncrement(). + bool isCounterIncrement() const; + void setIsCounterIncrement(bool isCounterIncrement); + + // Get/set the path number increment that this edge will be instrumented + // with. This is distinct from the path counter increment and the + // weight. The counter increment counts the number of executions of + // some path, whereas the path number keeps track of which path number + // the program is on. + long getIncrement() const; + void setIncrement(long increment); + + // Get/set whether the edge has been instrumented. + bool hasInstrumentation(); + void setHasInstrumentation(bool hasInstrumentation); + + // Returns the successor number of this edge in the source. + unsigned getSuccessorNumber(); + +private: + // The increment that the code will be instrumented with. + long long _increment; + + // Whether this edge is in the spanning tree. + bool _isInSpanningTree; + + // Whether this edge is an initialiation of the path number. + bool _isInitialization; + + // Whether this edge is a path counter increment. + bool _isCounterIncrement; + + // Whether this edge has been instrumented. + bool _hasInstrumentation; +}; + +// --------------------------------------------------------------------------- +// BLInstrumentationDag extends BallLarusDag with algorithms that +// determine where instrumentation should be placed. +// --------------------------------------------------------------------------- +class BLInstrumentationDag : public BallLarusDag { +public: + BLInstrumentationDag(Function &F); + + // Returns the Exit->Root edge. This edge is required for creating + // directed cycles in the algorithm for moving instrumentation off of + // the spanning tree + BallLarusEdge* getExitRootEdge(); + + // Returns an array of phony edges which mark those nodes + // with function calls + BLEdgeVector getCallPhonyEdges(); + + // Gets/sets the path counter array + GlobalVariable* getCounterArray(); + void setCounterArray(GlobalVariable* c); + + // Calculates the increments for the chords, thereby removing + // instrumentation from the spanning tree edges. Implementation is based + // on the algorithm in Figure 4 of [Ball94] + void calculateChordIncrements(); + + // Updates the state when an edge has been split + void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock); + + // Calculates a spanning tree of the DAG ignoring cycles. Whichever + // edges are in the spanning tree will not be instrumented, but this + // implementation does not try to minimize the instrumentation overhead + // by trying to find hot edges. + void calculateSpanningTree(); + + // Pushes initialization further down in order to group the first + // increment and initialization. + void pushInitialization(); + + // Pushes the path counter increments up in order to group the last path + // number increment. + void pushCounters(); + + // Removes phony edges from the successor list of the source, and the + // predecessor list of the target. + void unlinkPhony(); + + // Generate dot graph for the function + void generateDotGraph(); + +protected: + // BLInstrumentationDag creates BLInstrumentationNode objects in this + // method overriding the creation of BallLarusNode objects. + // + // Allows subclasses to determine which type of Node is created. + // Override this method to produce subclasses of BallLarusNode if + // necessary. + virtual BallLarusNode* createNode(BasicBlock* BB); + + // BLInstrumentationDag create BLInstrumentationEdges. + // + // Allows subclasses to determine which type of Edge is created. + // Override this method to produce subclasses of BallLarusEdge if + // necessary. Parameters source and target will have been created by + // createNode and can be cast to the subclass of BallLarusNode* + // returned by createNode. + virtual BallLarusEdge* createEdge( + BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber); + +private: + BLEdgeVector _treeEdges; // All edges in the spanning tree. + BLEdgeVector _chordEdges; // All edges not in the spanning tree. + GlobalVariable* _counterArray; // Array to store path counters + + // Removes the edge from the appropriate predecessor and successor lists. + void unlinkEdge(BallLarusEdge* edge); + + // Makes an edge part of the spanning tree. + void makeEdgeSpanning(BLInstrumentationEdge* edge); + + // Pushes initialization and calls itself recursively. + void pushInitializationFromEdge(BLInstrumentationEdge* edge); + + // Pushes path counter increments up recursively. + void pushCountersFromEdge(BLInstrumentationEdge* edge); + + // Depth first algorithm for determining the chord increments.f + void calculateChordIncrementsDfs( + long weight, BallLarusNode* v, BallLarusEdge* e); + + // Determines the relative direction of two edges. + int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f); +}; + +// --------------------------------------------------------------------------- +// PathProfiler is a module pass which intruments path profiling instructions +// --------------------------------------------------------------------------- +class PathProfiler : public ModulePass { +private: + // Current context for multi threading support. + LLVMContext* Context; + + // Which function are we currently instrumenting + unsigned currentFunctionNumber; + + // The function prototype in the profiling runtime for incrementing a + // single path counter in a hash table. + Constant* llvmIncrementHashFunction; + Constant* llvmDecrementHashFunction; + + // Instruments each function with path profiling. 'main' is instrumented + // with code to save the profile to disk. + bool runOnModule(Module &M); + + // Analyzes the function for Ball-Larus path profiling, and inserts code. + void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M); + + // Creates an increment constant representing incr. + ConstantInt* createIncrementConstant(long incr, int bitsize); + + // Creates an increment constant representing the value in + // edge->getIncrement(). + ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge); + + // Finds the insertion point after pathNumber in block. PathNumber may + // be NULL. + BasicBlock::iterator getInsertionPoint( + BasicBlock* block, Value* pathNumber); + + // Inserts source's pathNumber Value* into target. Target may or may not + // have multiple predecessors, and may or may not have its phiNode + // initalized. + void pushValueIntoNode( + BLInstrumentationNode* source, BLInstrumentationNode* target); + + // Inserts source's pathNumber Value* into the appropriate slot of + // target's phiNode. + void pushValueIntoPHI( + BLInstrumentationNode* target, BLInstrumentationNode* source); + + // The Value* in node, oldVal, is updated with a Value* correspodning to + // oldVal + addition. + void insertNumberIncrement(BLInstrumentationNode* node, Value* addition, + bool atBeginning); + + // Creates a counter increment in the given node. The Value* in node is + // taken as the index into a hash table. + void insertCounterIncrement( + Value* incValue, + BasicBlock::iterator insertPoint, + BLInstrumentationDag* dag, + bool increment = true); + + // A PHINode is created in the node, and its values initialized to -1U. + void preparePHI(BLInstrumentationNode* node); + + // Inserts instrumentation for the given edge + // + // Pre: The edge's source node has pathNumber set if edge is non zero + // path number increment. + // + // Post: Edge's target node has a pathNumber set to the path number Value + // corresponding to the value of the path register after edge's + // execution. + void insertInstrumentationStartingAt( + BLInstrumentationEdge* edge, + BLInstrumentationDag* dag); + + // If this edge is a critical edge, then inserts a node at this edge. + // This edge becomes the first edge, and a new BallLarusEdge is created. + bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag); + + // Inserts instrumentation according to the marked edges in dag. Phony + // edges must be unlinked from the DAG, but accessible from the + // backedges. Dag must have initializations, path number increments, and + // counter increments present. + // + // Counter storage is created here. + void insertInstrumentation( BLInstrumentationDag& dag, Module &M); + +public: + static char ID; // Pass identification, replacement for typeid + PathProfiler() : ModulePass(ID) { + initializePathProfilerPass(*PassRegistry::getPassRegistry()); + } + + virtual const char *getPassName() const { + return "Path Profiler"; + } +}; +} // end anonymous namespace + +// Should we print the dot-graphs +static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden, + cl::desc("Output the path profiling DAG for each function.")); + +// Register the path profiler as a pass +char PathProfiler::ID = 0; +INITIALIZE_PASS(PathProfiler, "insert-path-profiling", + "Insert instrumentation for Ball-Larus path profiling", + false, false) + +ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); } + +namespace llvm { + class PathProfilingFunctionTable {}; + + // Type for global array storing references to hashes or arrays + template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable, + xcompile> { + public: + static const StructType *get(LLVMContext& C) { + return( StructType::get( + C, TypeBuilder<types::i<32>, xcompile>::get(C), // type + TypeBuilder<types::i<32>, xcompile>::get(C), // array size + TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr + NULL)); + } + }; + + typedef TypeBuilder<PathProfilingFunctionTable, true> + ftEntryTypeBuilder; + + // BallLarusEdge << operator overloading + raw_ostream& operator<<(raw_ostream& os, + const BLInstrumentationEdge& edge) { + os << "[" << edge.getSource()->getName() << " -> " + << edge.getTarget()->getName() << "] init: " + << (edge.isInitialization() ? "yes" : "no") + << " incr:" << edge.getIncrement() << " cinc: " + << (edge.isCounterIncrement() ? "yes" : "no"); + return(os); + } +} + +// Creates a new BLInstrumentationNode from a BasicBlock. +BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) : + BallLarusNode(BB), + _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {} + +// Constructor for BLInstrumentationEdge. +BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source, + BLInstrumentationNode* target) + : BallLarusEdge(source, target, 0), + _increment(0), _isInSpanningTree(false), _isInitialization(false), + _isCounterIncrement(false), _hasInstrumentation(false) {} + +// Sets the target node of this edge. Required to split edges. +void BLInstrumentationEdge::setTarget(BallLarusNode* node) { + _target = node; +} + +// Returns whether this edge is in the spanning tree. +bool BLInstrumentationEdge::isInSpanningTree() const { + return(_isInSpanningTree); +} + +// Sets whether this edge is in the spanning tree. +void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) { + _isInSpanningTree = isInSpanningTree; +} + +// Returns whether this edge will be instrumented with a path number +// initialization. +bool BLInstrumentationEdge::isInitialization() const { + return(_isInitialization); +} + +// Sets whether this edge will be instrumented with a path number +// initialization. +void BLInstrumentationEdge::setIsInitialization(bool isInitialization) { + _isInitialization = isInitialization; +} + +// Returns whether this edge will be instrumented with a path counter +// increment. Notice this is incrementing the path counter +// corresponding to the path number register. The path number +// increment is determined by getIncrement(). +bool BLInstrumentationEdge::isCounterIncrement() const { + return(_isCounterIncrement); +} + +// Sets whether this edge will be instrumented with a path counter +// increment. +void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) { + _isCounterIncrement = isCounterIncrement; +} + +// Gets the path number increment that this edge will be instrumented +// with. This is distinct from the path counter increment and the +// weight. The counter increment is counts the number of executions of +// some path, whereas the path number keeps track of which path number +// the program is on. +long BLInstrumentationEdge::getIncrement() const { + return(_increment); +} + +// Set whether this edge will be instrumented with a path number +// increment. +void BLInstrumentationEdge::setIncrement(long increment) { + _increment = increment; +} + +// True iff the edge has already been instrumented. +bool BLInstrumentationEdge::hasInstrumentation() { + return(_hasInstrumentation); +} + +// Set whether this edge has been instrumented. +void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) { + _hasInstrumentation = hasInstrumentation; +} + +// Returns the successor number of this edge in the source. +unsigned BLInstrumentationEdge::getSuccessorNumber() { + BallLarusNode* sourceNode = getSource(); + BallLarusNode* targetNode = getTarget(); + BasicBlock* source = sourceNode->getBlock(); + BasicBlock* target = targetNode->getBlock(); + + if(source == NULL || target == NULL) + return(0); + + TerminatorInst* terminator = source->getTerminator(); + + unsigned i; + for(i=0; i < terminator->getNumSuccessors(); i++) { + if(terminator->getSuccessor(i) == target) + break; + } + + return(i); +} + +// BLInstrumentationDag constructor initializes a DAG for the given Function. +BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F), + _counterArray(0) { +} + +// Returns the Exit->Root edge. This edge is required for creating +// directed cycles in the algorithm for moving instrumentation off of +// the spanning tree +BallLarusEdge* BLInstrumentationDag::getExitRootEdge() { + BLEdgeIterator erEdge = getExit()->succBegin(); + return(*erEdge); +} + +BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () { + BLEdgeVector callEdges; + + for( BLEdgeIterator edge = _edges.begin(), end = _edges.end(); + edge != end; edge++ ) { + if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY ) + callEdges.push_back(*edge); + } + + return callEdges; +} + +// Gets the path counter array +GlobalVariable* BLInstrumentationDag::getCounterArray() { + return _counterArray; +} + +void BLInstrumentationDag::setCounterArray(GlobalVariable* c) { + _counterArray = c; +} + +// Calculates the increment for the chords, thereby removing +// instrumentation from the spanning tree edges. Implementation is based on +// the algorithm in Figure 4 of [Ball94] +void BLInstrumentationDag::calculateChordIncrements() { + calculateChordIncrementsDfs(0, getRoot(), NULL); + + BLInstrumentationEdge* chord; + for(BLEdgeIterator chordEdge = _chordEdges.begin(), + end = _chordEdges.end(); chordEdge != end; chordEdge++) { + chord = (BLInstrumentationEdge*) *chordEdge; + chord->setIncrement(chord->getIncrement() + chord->getWeight()); + } +} + +// Updates the state when an edge has been split +void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge, + BasicBlock* newBlock) { + BallLarusNode* oldTarget = formerEdge->getTarget(); + BallLarusNode* newNode = addNode(newBlock); + formerEdge->setTarget(newNode); + newNode->addPredEdge(formerEdge); + + DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n"); + + oldTarget->removePredEdge(formerEdge); + BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0); + + if( formerEdge->getType() == BallLarusEdge::BACKEDGE || + formerEdge->getType() == BallLarusEdge::SPLITEDGE) { + newEdge->setType(formerEdge->getType()); + newEdge->setPhonyRoot(formerEdge->getPhonyRoot()); + newEdge->setPhonyExit(formerEdge->getPhonyExit()); + formerEdge->setType(BallLarusEdge::NORMAL); + formerEdge->setPhonyRoot(NULL); + formerEdge->setPhonyExit(NULL); + } +} + +// Calculates a spanning tree of the DAG ignoring cycles. Whichever +// edges are in the spanning tree will not be instrumented, but this +// implementation does not try to minimize the instrumentation overhead +// by trying to find hot edges. +void BLInstrumentationDag::calculateSpanningTree() { + std::stack<BallLarusNode*> dfsStack; + + for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end(); + nodeIt != end; nodeIt++) { + (*nodeIt)->setColor(BallLarusNode::WHITE); + } + + dfsStack.push(getRoot()); + while(dfsStack.size() > 0) { + BallLarusNode* node = dfsStack.top(); + dfsStack.pop(); + + if(node->getColor() == BallLarusNode::WHITE) + continue; + + BallLarusNode* nextNode; + bool forward = true; + BLEdgeIterator succEnd = node->succEnd(); + + node->setColor(BallLarusNode::WHITE); + // first iterate over successors then predecessors + for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd(); + edge != predEnd; edge++) { + if(edge == succEnd) { + edge = node->predBegin(); + forward = false; + } + + // Ignore split edges + if ((*edge)->getType() == BallLarusEdge::SPLITEDGE) + continue; + + nextNode = forward? (*edge)->getTarget(): (*edge)->getSource(); + if(nextNode->getColor() != BallLarusNode::WHITE) { + nextNode->setColor(BallLarusNode::WHITE); + makeEdgeSpanning((BLInstrumentationEdge*)(*edge)); + } + } + } + + for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); + edge != end; edge++) { + BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge); + // safe since createEdge is overriden + if(!instEdge->isInSpanningTree() && (*edge)->getType() + != BallLarusEdge::SPLITEDGE) + _chordEdges.push_back(instEdge); + } +} + +// Pushes initialization further down in order to group the first +// increment and initialization. +void BLInstrumentationDag::pushInitialization() { + BLInstrumentationEdge* exitRootEdge = + (BLInstrumentationEdge*) getExitRootEdge(); + exitRootEdge->setIsInitialization(true); + pushInitializationFromEdge(exitRootEdge); +} + +// Pushes the path counter increments up in order to group the last path +// number increment. +void BLInstrumentationDag::pushCounters() { + BLInstrumentationEdge* exitRootEdge = + (BLInstrumentationEdge*) getExitRootEdge(); + exitRootEdge->setIsCounterIncrement(true); + pushCountersFromEdge(exitRootEdge); +} + +// Removes phony edges from the successor list of the source, and the +// predecessor list of the target. +void BLInstrumentationDag::unlinkPhony() { + BallLarusEdge* edge; + + for(BLEdgeIterator next = _edges.begin(), + end = _edges.end(); next != end; next++) { + edge = (*next); + + if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY || + edge->getType() == BallLarusEdge::SPLITEDGE_PHONY || + edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) { + unlinkEdge(edge); + } + } +} + +// Generate a .dot graph to represent the DAG and pathNumbers +void BLInstrumentationDag::generateDotGraph() { + std::string errorInfo; + std::string functionName = getFunction().getNameStr(); + std::string filename = "pathdag." + functionName + ".dot"; + + DEBUG (dbgs() << "Writing '" << filename << "'...\n"); + raw_fd_ostream dotFile(filename.c_str(), errorInfo); + + if (!errorInfo.empty()) { + errs() << "Error opening '" << filename.c_str() <<"' for writing!"; + errs() << "\n"; + return; + } + + dotFile << "digraph " << functionName << " {\n"; + + for( BLEdgeIterator edge = _edges.begin(), end = _edges.end(); + edge != end; edge++) { + std::string sourceName = (*edge)->getSource()->getName(); + std::string targetName = (*edge)->getTarget()->getName(); + + dotFile << "\t\"" << sourceName.c_str() << "\" -> \"" + << targetName.c_str() << "\" "; + + long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement(); + + switch( (*edge)->getType() ) { + case BallLarusEdge::NORMAL: + dotFile << "[label=" << inc << "] [color=black];\n"; + break; + + case BallLarusEdge::BACKEDGE: + dotFile << "[color=cyan];\n"; + break; + + case BallLarusEdge::BACKEDGE_PHONY: + dotFile << "[label=" << inc + << "] [color=blue];\n"; + break; + + case BallLarusEdge::SPLITEDGE: + dotFile << "[color=violet];\n"; + break; + + case BallLarusEdge::SPLITEDGE_PHONY: + dotFile << "[label=" << inc << "] [color=red];\n"; + break; + + case BallLarusEdge::CALLEDGE_PHONY: + dotFile << "[label=" << inc << "] [color=green];\n"; + break; + } + } + + dotFile << "}\n"; +} + +// Allows subclasses to determine which type of Node is created. +// Override this method to produce subclasses of BallLarusNode if +// necessary. The destructor of BallLarusDag will call free on each pointer +// created. +BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) { + return( new BLInstrumentationNode(BB) ); +} + +// Allows subclasses to determine which type of Edge is created. +// Override this method to produce subclasses of BallLarusEdge if +// necessary. The destructor of BallLarusDag will call free on each pointer +// created. +BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source, + BallLarusNode* target, unsigned edgeNumber) { + // One can cast from BallLarusNode to BLInstrumentationNode since createNode + // is overriden to produce BLInstrumentationNode. + return( new BLInstrumentationEdge((BLInstrumentationNode*)source, + (BLInstrumentationNode*)target) ); +} + +// Sets the Value corresponding to the pathNumber register, constant, +// or phinode. Used by the instrumentation code to remember path +// number Values. +Value* BLInstrumentationNode::getStartingPathNumber(){ + return(_startingPathNumber); +} + +// Sets the Value of the pathNumber. Used by the instrumentation code. +void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) { + DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ? + pathNumber->getNameStr() : "unused") << "\n"); + _startingPathNumber = pathNumber; +} + +Value* BLInstrumentationNode::getEndingPathNumber(){ + return(_endingPathNumber); +} + +void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) { + DEBUG(dbgs() << " EPN-" << getName() << " <-- " + << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n"); + _endingPathNumber = pathNumber; +} + +// Get the PHINode Instruction for this node. Used by instrumentation +// code. +PHINode* BLInstrumentationNode::getPathPHI() { + return(_pathPHI); +} + +// Set the PHINode Instruction for this node. Used by instrumentation +// code. +void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) { + _pathPHI = pathPHI; +} + +// Removes the edge from the appropriate predecessor and successor +// lists. +void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) { + if(edge == getExitRootEdge()) + DEBUG(dbgs() << " Removing exit->root edge\n"); + + edge->getSource()->removeSuccEdge(edge); + edge->getTarget()->removePredEdge(edge); +} + +// Makes an edge part of the spanning tree. +void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) { + edge->setIsInSpanningTree(true); + _treeEdges.push_back(edge); +} + +// Pushes initialization and calls itself recursively. +void BLInstrumentationDag::pushInitializationFromEdge( + BLInstrumentationEdge* edge) { + BallLarusNode* target; + + target = edge->getTarget(); + if( target->getNumberPredEdges() > 1 || target == getExit() ) { + return; + } else { + for(BLEdgeIterator next = target->succBegin(), + end = target->succEnd(); next != end; next++) { + BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next; + + // Skip split edges + if (intoEdge->getType() == BallLarusEdge::SPLITEDGE) + continue; + + intoEdge->setIncrement(intoEdge->getIncrement() + + edge->getIncrement()); + intoEdge->setIsInitialization(true); + pushInitializationFromEdge(intoEdge); + } + + edge->setIncrement(0); + edge->setIsInitialization(false); + } +} + +// Pushes path counter increments up recursively. +void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) { + BallLarusNode* source; + + source = edge->getSource(); + if(source->getNumberSuccEdges() > 1 || source == getRoot() + || edge->isInitialization()) { + return; + } else { + for(BLEdgeIterator previous = source->predBegin(), + end = source->predEnd(); previous != end; previous++) { + BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous; + + // Skip split edges + if (fromEdge->getType() == BallLarusEdge::SPLITEDGE) + continue; + + fromEdge->setIncrement(fromEdge->getIncrement() + + edge->getIncrement()); + fromEdge->setIsCounterIncrement(true); + pushCountersFromEdge(fromEdge); + } + + edge->setIncrement(0); + edge->setIsCounterIncrement(false); + } +} + +// Depth first algorithm for determining the chord increments. +void BLInstrumentationDag::calculateChordIncrementsDfs(long weight, + BallLarusNode* v, BallLarusEdge* e) { + BLInstrumentationEdge* f; + + for(BLEdgeIterator treeEdge = _treeEdges.begin(), + end = _treeEdges.end(); treeEdge != end; treeEdge++) { + f = (BLInstrumentationEdge*) *treeEdge; + if(e != f && v == f->getTarget()) { + calculateChordIncrementsDfs( + calculateChordIncrementsDir(e,f)*(weight) + + f->getWeight(), f->getSource(), f); + } + if(e != f && v == f->getSource()) { + calculateChordIncrementsDfs( + calculateChordIncrementsDir(e,f)*(weight) + + f->getWeight(), f->getTarget(), f); + } + } + + for(BLEdgeIterator chordEdge = _chordEdges.begin(), + end = _chordEdges.end(); chordEdge != end; chordEdge++) { + f = (BLInstrumentationEdge*) *chordEdge; + if(v == f->getSource() || v == f->getTarget()) { + f->setIncrement(f->getIncrement() + + calculateChordIncrementsDir(e,f)*weight); + } + } +} + +// Determines the relative direction of two edges. +int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e, + BallLarusEdge* f) { + if( e == NULL) + return(1); + else if(e->getSource() == f->getTarget() + || e->getTarget() == f->getSource()) + return(1); + + return(-1); +} + +// Creates an increment constant representing incr. +ConstantInt* PathProfiler::createIncrementConstant(long incr, + int bitsize) { + return(ConstantInt::get(IntegerType::get(*Context, 32), incr)); +} + +// Creates an increment constant representing the value in +// edge->getIncrement(). +ConstantInt* PathProfiler::createIncrementConstant( + BLInstrumentationEdge* edge) { + return(createIncrementConstant(edge->getIncrement(), 32)); +} + +// Finds the insertion point after pathNumber in block. PathNumber may +// be NULL. +BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* + pathNumber) { + if(pathNumber == NULL || isa<ConstantInt>(pathNumber) + || (((Instruction*)(pathNumber))->getParent()) != block) { + return(block->getFirstNonPHI()); + } else { + Instruction* pathNumberInst = (Instruction*) (pathNumber); + BasicBlock::iterator insertPoint; + BasicBlock::iterator end = block->end(); + + for(insertPoint = block->begin(); + insertPoint != end; insertPoint++) { + Instruction* insertInst = &(*insertPoint); + + if(insertInst == pathNumberInst) + return(++insertPoint); + } + + return(insertPoint); + } +} + +// A PHINode is created in the node, and its values initialized to -1U. +void PathProfiler::preparePHI(BLInstrumentationNode* node) { + BasicBlock* block = node->getBlock(); + BasicBlock::iterator insertPoint = block->getFirstNonPHI(); + PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber", + insertPoint ); + node->setPathPHI(phi); + node->setStartingPathNumber(phi); + node->setEndingPathNumber(phi); + + for(pred_iterator predIt = pred_begin(node->getBlock()), + end = pred_end(node->getBlock()); predIt != end; predIt++) { + BasicBlock* pred = (*predIt); + + if(pred != NULL) + phi->addIncoming(createIncrementConstant((long)-1, 32), pred); + } +} + +// Inserts source's pathNumber Value* into target. Target may or may not +// have multiple predecessors, and may or may not have its phiNode +// initalized. +void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source, + BLInstrumentationNode* target) { + if(target->getBlock() == NULL) + return; + + + if(target->getNumberPredEdges() <= 1) { + assert(target->getStartingPathNumber() == NULL && + "Target already has path number"); + target->setStartingPathNumber(source->getEndingPathNumber()); + target->setEndingPathNumber(source->getEndingPathNumber()); + DEBUG(dbgs() << " Passing path number" + << (source->getEndingPathNumber() ? "" : " (null)") + << " value through.\n"); + } else { + if(target->getPathPHI() == NULL) { + DEBUG(dbgs() << " Initializing PHI node for block '" + << target->getName() << "'\n"); + preparePHI(target); + } + pushValueIntoPHI(target, source); + DEBUG(dbgs() << " Passing number value into PHI for block '" + << target->getName() << "'\n"); + } +} + +// Inserts source's pathNumber Value* into the appropriate slot of +// target's phiNode. +void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target, + BLInstrumentationNode* source) { + PHINode* phi = target->getPathPHI(); + assert(phi != NULL && " Tried to push value into node with PHI, but node" + " actually had no PHI."); + phi->removeIncomingValue(source->getBlock(), false); + phi->addIncoming(source->getEndingPathNumber(), source->getBlock()); +} + +// The Value* in node, oldVal, is updated with a Value* correspodning to +// oldVal + addition. +void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node, + Value* addition, bool atBeginning) { + BasicBlock* block = node->getBlock(); + assert(node->getStartingPathNumber() != NULL); + assert(node->getEndingPathNumber() != NULL); + + BasicBlock::iterator insertPoint; + + if( atBeginning ) + insertPoint = block->getFirstNonPHI(); + else + insertPoint = block->getTerminator(); + + DEBUG(errs() << " Creating addition instruction.\n"); + Value* newpn = BinaryOperator::Create(Instruction::Add, + node->getStartingPathNumber(), + addition, "pathNumber", insertPoint); + + node->setEndingPathNumber(newpn); + + if( atBeginning ) + node->setStartingPathNumber(newpn); +} + +// Creates a counter increment in the given node. The Value* in node is +// taken as the index into an array or hash table. The hash table access +// is a call to the runtime. +void PathProfiler::insertCounterIncrement(Value* incValue, + BasicBlock::iterator insertPoint, + BLInstrumentationDag* dag, + bool increment) { + // Counter increment for array + if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) { + // Get pointer to the array location + std::vector<Value*> gepIndices(2); + gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); + gepIndices[1] = incValue; + + GetElementPtrInst* pcPointer = + GetElementPtrInst::Create(dag->getCounterArray(), + gepIndices.begin(), gepIndices.end(), + "counterInc", insertPoint); + + // Load from the array - call it oldPC + LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint); + + // Test to see whether adding 1 will overflow the counter + ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc, + createIncrementConstant(0xffffffff, 32), + "isMax"); + + // Select increment for the path counter based on overflow + SelectInst* inc = + SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32), + createIncrementConstant(0,32), + "pathInc", insertPoint); + + // newPc = oldPc + inc + BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add, + oldPc, inc, "newPC", + insertPoint); + + // Store back in to the array + new StoreInst(newPc, pcPointer, insertPoint); + } else { // Counter increment for hash + std::vector<Value*> args(2); + args[0] = ConstantInt::get(Type::getInt32Ty(*Context), + currentFunctionNumber); + args[1] = incValue; + + CallInst::Create( + increment ? llvmIncrementHashFunction : llvmDecrementHashFunction, + args.begin(), args.end(), "", insertPoint); + } +} + +// Inserts instrumentation for the given edge +// +// Pre: The edge's source node has pathNumber set if edge is non zero +// path number increment. +// +// Post: Edge's target node has a pathNumber set to the path number Value +// corresponding to the value of the path register after edge's +// execution. +// +// FIXME: This should be reworked so it's not recursive. +void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge, + BLInstrumentationDag* dag) { + // Mark the edge as instrumented + edge->setHasInstrumentation(true); + DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n"); + + // create a new node for this edge's instrumentation + splitCritical(edge, dag); + + BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource(); + BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget(); + BLInstrumentationNode* instrumentNode; + BLInstrumentationNode* nextSourceNode; + + bool atBeginning = false; + + // Source node has only 1 successor so any information can be simply + // inserted in to it without splitting + if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) { + DEBUG(dbgs() << " Potential instructions to be placed in: " + << sourceNode->getName() << " (at end)\n"); + instrumentNode = sourceNode; + nextSourceNode = targetNode; // ... since we never made any new nodes + } + + // The target node only has one predecessor, so we can safely insert edge + // instrumentation into it. If there was splitting, it must have been + // successful. + else if( targetNode->getNumberPredEdges() == 1 ) { + DEBUG(dbgs() << " Potential instructions to be placed in: " + << targetNode->getName() << " (at beginning)\n"); + pushValueIntoNode(sourceNode, targetNode); + instrumentNode = targetNode; + nextSourceNode = NULL; // ... otherwise we'll just keep splitting + atBeginning = true; + } + + // Somehow, splitting must have failed. + else { + errs() << "Instrumenting could not split a critical edge.\n"; + DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n"); + return; + } + + // Insert instrumentation if this is a back or split edge + if( edge->getType() == BallLarusEdge::BACKEDGE || + edge->getType() == BallLarusEdge::SPLITEDGE ) { + BLInstrumentationEdge* top = + (BLInstrumentationEdge*) edge->getPhonyRoot(); + BLInstrumentationEdge* bottom = + (BLInstrumentationEdge*) edge->getPhonyExit(); + + assert( top->isInitialization() && " Top phony edge did not" + " contain a path number initialization."); + assert( bottom->isCounterIncrement() && " Bottom phony edge" + " did not contain a path counter increment."); + + // split edge has yet to be initialized + if( !instrumentNode->getEndingPathNumber() ) { + instrumentNode->setStartingPathNumber(createIncrementConstant(0,32)); + instrumentNode->setEndingPathNumber(createIncrementConstant(0,32)); + } + + BasicBlock::iterator insertPoint = atBeginning ? + instrumentNode->getBlock()->getFirstNonPHI() : + instrumentNode->getBlock()->getTerminator(); + + // add information from the bottom edge, if it exists + if( bottom->getIncrement() ) { + Value* newpn = + BinaryOperator::Create(Instruction::Add, + instrumentNode->getStartingPathNumber(), + createIncrementConstant(bottom), + "pathNumber", insertPoint); + instrumentNode->setEndingPathNumber(newpn); + } + + insertCounterIncrement(instrumentNode->getEndingPathNumber(), + insertPoint, dag); + + if( atBeginning ) + instrumentNode->setStartingPathNumber(createIncrementConstant(top)); + + instrumentNode->setEndingPathNumber(createIncrementConstant(top)); + + // Check for path counter increments + if( top->isCounterIncrement() ) { + insertCounterIncrement(instrumentNode->getEndingPathNumber(), + instrumentNode->getBlock()->getTerminator(),dag); + instrumentNode->setEndingPathNumber(0); + } + } + + // Insert instrumentation if this is a normal edge + else { + BasicBlock::iterator insertPoint = atBeginning ? + instrumentNode->getBlock()->getFirstNonPHI() : + instrumentNode->getBlock()->getTerminator(); + + if( edge->isInitialization() ) { // initialize path number + instrumentNode->setEndingPathNumber(createIncrementConstant(edge)); + } else if( edge->getIncrement() ) {// increment path number + Value* newpn = + BinaryOperator::Create(Instruction::Add, + instrumentNode->getStartingPathNumber(), + createIncrementConstant(edge), + "pathNumber", insertPoint); + instrumentNode->setEndingPathNumber(newpn); + + if( atBeginning ) + instrumentNode->setStartingPathNumber(newpn); + } + + // Check for path counter increments + if( edge->isCounterIncrement() ) { + insertCounterIncrement(instrumentNode->getEndingPathNumber(), + insertPoint, dag); + instrumentNode->setEndingPathNumber(0); + } + } + + // Push it along + if (nextSourceNode && instrumentNode->getEndingPathNumber()) + pushValueIntoNode(instrumentNode, nextSourceNode); + + // Add all the successors + for( BLEdgeIterator next = targetNode->succBegin(), + end = targetNode->succEnd(); next != end; next++ ) { + // So long as it is un-instrumented, add it to the list + if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() ) + insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag); + else + DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next) + << " already instrumented.\n"); + } +} + +// Inserts instrumentation according to the marked edges in dag. Phony edges +// must be unlinked from the DAG, but accessible from the backedges. Dag +// must have initializations, path number increments, and counter increments +// present. +// +// Counter storage is created here. +void PathProfiler::insertInstrumentation( + BLInstrumentationDag& dag, Module &M) { + + BLInstrumentationEdge* exitRootEdge = + (BLInstrumentationEdge*) dag.getExitRootEdge(); + insertInstrumentationStartingAt(exitRootEdge, &dag); + + // Iterate through each call edge and apply the appropriate hash increment + // and decrement functions + BLEdgeVector callEdges = dag.getCallPhonyEdges(); + for( BLEdgeIterator edge = callEdges.begin(), + end = callEdges.end(); edge != end; edge++ ) { + BLInstrumentationNode* node = + (BLInstrumentationNode*)(*edge)->getSource(); + BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI(); + + // Find the first function call + while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call ) + insertPoint++; + + DEBUG(dbgs() << "\nInstrumenting method call block '" + << node->getBlock()->getNameStr() << "'\n"); + DEBUG(dbgs() << " Path number initialized: " + << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n"); + + Value* newpn; + if( node->getStartingPathNumber() ) { + long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement(); + if ( inc ) + newpn = BinaryOperator::Create(Instruction::Add, + node->getStartingPathNumber(), + createIncrementConstant(inc,32), + "pathNumber", insertPoint); + else + newpn = node->getStartingPathNumber(); + } else { + newpn = (Value*)createIncrementConstant( + ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32); + } + + insertCounterIncrement(newpn, insertPoint, &dag); + insertCounterIncrement(newpn, node->getBlock()->getTerminator(), + &dag, false); + } +} + +// Entry point of the module +void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, + Function &F, Module &M) { + // Build DAG from CFG + BLInstrumentationDag dag = BLInstrumentationDag(F); + dag.init(); + + // give each path a unique integer value + dag.calculatePathNumbers(); + + // modify path increments to increase the efficiency + // of instrumentation + dag.calculateSpanningTree(); + dag.calculateChordIncrements(); + dag.pushInitialization(); + dag.pushCounters(); + dag.unlinkPhony(); + + // potentially generate .dot graph for the dag + if (DotPathDag) + dag.generateDotGraph (); + + // Should we store the information in an array or hash + if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) { + const Type* t = ArrayType::get(Type::getInt32Ty(*Context), + dag.getNumberOfPaths()); + + dag.setCounterArray(new GlobalVariable(M, t, false, + GlobalValue::InternalLinkage, + Constant::getNullValue(t), "")); + } + + insertInstrumentation(dag, M); + + // Add to global function reference table + unsigned type; + const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context); + + if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) + type = ProfilingArray; + else + type = ProfilingHash; + + std::vector<Constant*> entryArray(3); + entryArray[0] = createIncrementConstant(type,32); + entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32); + entryArray[2] = dag.getCounterArray() ? + ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) : + Constant::getNullValue(voidPtr); + + const StructType* at = ftEntryTypeBuilder::get(*Context); + ConstantStruct* functionEntry = + (ConstantStruct*)ConstantStruct::get(at, entryArray); + ftInit.push_back(functionEntry); +} + +// Output the bitcode if we want to observe instrumentation changess +#define PRINT_MODULE dbgs() << \ + "\n\n============= MODULE BEGIN ===============\n" << M << \ + "\n============== MODULE END ================\n" + +bool PathProfiler::runOnModule(Module &M) { + Context = &M.getContext(); + + DEBUG(dbgs() + << "****************************************\n" + << "****************************************\n" + << "** **\n" + << "** PATH PROFILING INSTRUMENTATION **\n" + << "** **\n" + << "****************************************\n" + << "****************************************\n"); + + // No main, no instrumentation! + Function *Main = M.getFunction("main"); + + // Using fortran? ... this kind of works + if (!Main) + Main = M.getFunction("MAIN__"); + + if (!Main) { + errs() << "WARNING: cannot insert path profiling into a module" + << " with no main function!\n"; + return false; + } + + BasicBlock::iterator insertPoint = Main->getEntryBlock().getFirstNonPHI(); + + llvmIncrementHashFunction = M.getOrInsertFunction( + "llvm_increment_path_count", + Type::getVoidTy(*Context), // return type + Type::getInt32Ty(*Context), // function number + Type::getInt32Ty(*Context), // path number + NULL ); + + llvmDecrementHashFunction = M.getOrInsertFunction( + "llvm_decrement_path_count", + Type::getVoidTy(*Context), // return type + Type::getInt32Ty(*Context), // function number + Type::getInt32Ty(*Context), // path number + NULL ); + + std::vector<Constant*> ftInit; + unsigned functionNumber = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) { + if (F->isDeclaration()) + continue; + + DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n"); + functionNumber++; + + // set function number + currentFunctionNumber = functionNumber; + runOnFunction(ftInit, *F, M); + } + + const Type *t = ftEntryTypeBuilder::get(*Context); + const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size()); + Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit); + + DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n"); + + GlobalVariable* functionTable = + new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage, + ftInitConstant, "functionPathTable"); + const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0); + InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable, + PointerType::getUnqual(eltType)); + + DEBUG(PRINT_MODULE); + + return true; +} + +// If this edge is a critical edge, then inserts a node at this edge. +// This edge becomes the first edge, and a new BallLarusEdge is created. +// Returns true if the edge was split +bool PathProfiler::splitCritical(BLInstrumentationEdge* edge, + BLInstrumentationDag* dag) { + unsigned succNum = edge->getSuccessorNumber(); + BallLarusNode* sourceNode = edge->getSource(); + BallLarusNode* targetNode = edge->getTarget(); + BasicBlock* sourceBlock = sourceNode->getBlock(); + BasicBlock* targetBlock = targetNode->getBlock(); + + if(sourceBlock == NULL || targetBlock == NULL + || sourceNode->getNumberSuccEdges() <= 1 + || targetNode->getNumberPredEdges() == 1 ) { + return(false); + } + + TerminatorInst* terminator = sourceBlock->getTerminator(); + + if( SplitCriticalEdge(terminator, succNum, this, false)) { + BasicBlock* newBlock = terminator->getSuccessor(succNum); + dag->splitUpdate(edge, newBlock); + return(true); + } else + return(false); +} diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 1a30e9b..b57bbf6 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -22,12 +22,13 @@ #include "llvm/Module.h" void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, - GlobalValue *Array) { + GlobalValue *Array, + PointerType *arrayType) { LLVMContext &Context = MainFn->getContext(); - const Type *ArgVTy = + const Type *ArgVTy = PointerType::getUnqual(Type::getInt8PtrTy(Context)); - const PointerType *UIntPtr = - Type::getInt32PtrTy(Context); + const PointerType *UIntPtr = arrayType ? arrayType : + Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), Type::getInt32Ty(Context), @@ -71,9 +72,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, case 2: AI = MainFn->arg_begin(); ++AI; if (AI->getType() != ArgVTy) { - Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, + Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, false); - InitCall->setArgOperand(1, + InitCall->setArgOperand(1, CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall)); } else { InitCall->setArgOperand(1, AI); @@ -93,7 +94,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } opcode = CastInst::getCastOpcode(AI, true, Type::getInt32Ty(Context), true); - InitCall->setArgOperand(0, + InitCall->setArgOperand(0, CastInst::Create(opcode, AI, Type::getInt32Ty(Context), "argc.cast", InitCall)); } else { @@ -106,9 +107,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray) { + GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... - BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); + BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() : + BB->getTerminator(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; @@ -118,7 +120,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, std::vector<Constant*> Indices(2); Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); - Constant *ElementPtr = + Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h index 94efffe..a76e357 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h +++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h @@ -21,11 +21,14 @@ namespace llvm { class Function; class GlobalValue; class BasicBlock; + class PointerType; void InsertProfilingInitCall(Function *MainFn, const char *FnName, - GlobalValue *Arr = 0); + GlobalValue *Arr = 0, + PointerType *arrayType = 0); void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray); + GlobalValue *CounterArray, + bool beginning = true); } #endif diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp index ada086e..a5adb5e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -33,7 +33,9 @@ STATISTIC(NumRemoved, "Number of instructions removed"); namespace { struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ADCE() : FunctionPass(ID) {} + ADCE() : FunctionPass(ID) { + initializeADCEPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function& F); @@ -45,7 +47,7 @@ namespace { } char ADCE::ID = 0; -INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false); +INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) bool ADCE::runOnFunction(Function& F) { SmallPtrSet<Instruction*, 128> alive; diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp index b144678..cee5502 100644 --- a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -41,7 +41,9 @@ STATISTIC(NumMoved, "Number of basic blocks moved"); namespace { struct BlockPlacement : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BlockPlacement() : FunctionPass(ID) {} + BlockPlacement() : FunctionPass(ID) { + initializeBlockPlacementPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -74,8 +76,11 @@ namespace { } char BlockPlacement::ID = 0; -INITIALIZE_PASS(BlockPlacement, "block-placement", - "Profile Guided Basic Block Placement", false, false); +INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false) +INITIALIZE_AG_DEPENDENCY(ProfileInfo) +INITIALIZE_PASS_END(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false) FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index e07b761..9536939 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -22,6 +22,8 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -31,6 +33,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" @@ -39,31 +42,59 @@ #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/ValueHandle.h" using namespace llvm; using namespace llvm::PatternMatch; +STATISTIC(NumBlocksElim, "Number of blocks eliminated"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " + "sunken Cmps"); +STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " + "of sunken Casts"); +STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " + "computations were sunk"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); + static cl::opt<bool> CriticalEdgeSplit("cgp-critical-edge-splitting", cl::desc("Split critical edges during codegen prepare"), - cl::init(true), cl::Hidden); + cl::init(false), cl::Hidden); namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + DominatorTree *DT; ProfileInfo *PFI; + + /// CurInstIterator - As we scan instructions optimizing them, this is the + /// next instruction to optimize. Xforms that can invalidate this should + /// update it. + BasicBlock::iterator CurInstIterator; /// BackEdges - Keep a set of all the loop back edges. /// SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; + + // Keeps track of non-local addresses that have been sunk into a block. This + // allows us to avoid inserting duplicate code for blocks with multiple + // load/stores of the same address. + DenseMap<Value*, Value*> SunkAddrs; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) - : FunctionPass(ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); } @@ -76,10 +107,9 @@ namespace { bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); bool OptimizeBlock(BasicBlock &BB); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, - DenseMap<Value*,Value*> &SunkAddrs); - bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, - DenseMap<Value*,Value*> &SunkAddrs); + bool OptimizeInst(Instruction *I); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy); + bool OptimizeInlineAsmInst(CallInst *CS); bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); @@ -89,7 +119,7 @@ namespace { char CodeGenPrepare::ID = 0; INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false); + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); @@ -108,13 +138,16 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) { bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); - // Now find loop back edges. - findLoopBackEdges(F); + // Now find loop back edges, but only if they are being used to decide which + // critical edges to split. + if (CriticalEdgeSplit) + findLoopBackEdges(F); bool MadeChange = true; while (MadeChange) { @@ -123,6 +156,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= OptimizeBlock(*BB); EverMadeChange |= MadeChange; } + + SunkAddrs.clear(); + return EverMadeChange; } @@ -297,11 +333,19 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); + if (DT) { + BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); + BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); + BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); + DT->changeImmediateDominator(DestBB, NewIDom); + DT->eraseNode(BB); + } if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); + ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } @@ -480,6 +524,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // Replace a use of the cast with a use of the new cast. TheUse = InsertedCast; + ++NumCastUses; } // If we removed all uses, nuke the cast. @@ -537,6 +582,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) { // Replace a use of the cmp with a use of the new cmp. TheUse = InsertedCmp; + ++NumCmpUses; } // If we removed all uses, nuke the cmp. @@ -563,14 +609,45 @@ protected: } // end anonymous namespace bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { + BasicBlock *BB = CI->getParent(); + + // Lower inline assembly if we can. + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + // Avoid invalidating the iterator. + CurInstIterator = BB->begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + return true; + } + // Sink address computing for memory operands into the block. + if (OptimizeInlineAsmInst(CI)) + return true; + } + // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - CI->replaceAllUsesWith(RetVal); - CI->eraseFromParent(); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(CurInstIterator); + + ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } return true; } @@ -588,6 +665,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { CodeGenPrepareFortifiedLibCalls Simplifier; return Simplifier.fold(CI, TD); } + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -610,13 +688,69 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - const Type *AccessTy, - DenseMap<Value*,Value*> &SunkAddrs) { - // Figure out what addressing mode will be built up for this operation. + const Type *AccessTy) { + Value *Repl = Addr; + + // Try to collapse single-value PHI nodes. This is necessary to undo + // unprofitable PRE transformations. + SmallVector<Value*, 8> worklist; + SmallPtrSet<Value*, 16> Visited; + worklist.push_back(Addr); + + // Use a worklist to iteratively look through PHI nodes, and ensure that + // the addressing mode obtained from the non-PHI roots of the graph + // are equivalent. + Value *Consensus = 0; + unsigned NumUses = 0; SmallVector<Instruction*, 16> AddrModeInsts; - ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, - AddrModeInsts, *TLI); - + ExtAddrMode AddrMode; + while (!worklist.empty()) { + Value *V = worklist.back(); + worklist.pop_back(); + + // Break use-def graph loops. + if (Visited.count(V)) { + Consensus = 0; + break; + } + + Visited.insert(V); + + // For a PHI node, push all of its incoming values. + if (PHINode *P = dyn_cast<PHINode>(V)) { + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) + worklist.push_back(P->getIncomingValue(i)); + continue; + } + + // For non-PHIs, determine the addressing mode being computed. + SmallVector<Instruction*, 16> NewAddrModeInsts; + ExtAddrMode NewAddrMode = + AddressingModeMatcher::Match(V, AccessTy,MemoryInst, + NewAddrModeInsts, *TLI); + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher applicable. + if (!Consensus || NewAddrMode == AddrMode) { + if (V->getNumUses() > NumUses) { + Consensus = V; + NumUses = V->getNumUses(); + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + } + continue; + } + + Consensus = 0; + break; + } + + // If the addressing mode couldn't be determined, or if multiple different + // ones were determined, bail out now. + if (!Consensus) return false; + // Check to see if any of the instructions supersumed by this addr mode are // non-local to I's BB. bool AnyNonLocal = false; @@ -719,60 +853,39 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); } - MemoryInst->replaceUsesOfWith(Addr, SunkAddr); + MemoryInst->replaceUsesOfWith(Repl, SunkAddr); - if (Addr->use_empty()) { - RecursivelyDeleteTriviallyDeadInstructions(Addr); + if (Repl->use_empty()) { + RecursivelyDeleteTriviallyDeadInstructions(Repl); // This address is now available for reassignment, so erase the table entry; // we don't want to match some completely different instruction. SunkAddrs[Addr] = 0; } + ++NumMemoryInsts; return true; } /// OptimizeInlineAsmInst - If there are any memory operands, use /// OptimizeMemoryInst to sink their address computing into the block when /// possible / profitable. -bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, - DenseMap<Value*,Value*> &SunkAddrs) { +bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; - InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); - - // Do a prepass over the constraints, canonicalizing them, and building up the - // ConstraintOperands list. - std::vector<InlineAsm::ConstraintInfo> - ConstraintInfos = IA->ParseConstraints(); - - /// ConstraintOperands - Information about all of the constraints. - std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands. - push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); - TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); - - // Compute the value type for each operand. - switch (OpInfo.Type) { - case InlineAsm::isOutput: - if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); - break; - case InlineAsm::isInput: - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); - break; - case InlineAsm::isClobber: - // Nothing to do. - break; - } + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI->ParseConstraints(CS); + unsigned ArgNo = 0; + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { - Value *OpVal = OpInfo.CallOperandVal; - MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); - } + Value *OpVal = CS->getArgOperand(ArgNo++); + MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); + } else if (OpInfo.Type == InlineAsm::isInput) + ArgNo++; } return MadeChange; @@ -794,7 +907,9 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { // If the load has other users and the truncate is not free, this probably // isn't worthwhile. if (!LI->hasOneUse() && - TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) + TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || + !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && + !TLI->isTruncateFree(I->getType(), LI->getType())) return false; // Check whether the target supports casts folded into loads. @@ -812,13 +927,14 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { // can fold it. I->removeFromParent(); I->insertAfter(LI); + ++NumExtsMoved; return true; } bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); - // If both result of the {s|z}xt and its source are live out, rewrite all + // If the result of a {s|z}ext and its source are both live out, rewrite all // other uses of the source with result of extension. Value *Src = I->getOperand(0); if (Src->hasOneUse()) @@ -883,13 +999,83 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { // Replace a use of the {s|z}ext source with a use of the result. TheUse = InsertedTrunc; - + ++NumExtUses; MadeChange = true; } return MadeChange; } +bool CodeGenPrepare::OptimizeInst(Instruction *I) { + if (PHINode *P = dyn_cast<PHINode>(I)) { + // It is possible for very late stage optimizations (such as SimplifyCFG) + // to introduce PHI nodes too late to be cleaned up. If we detect such a + // trivial PHI, go ahead and zap it here. + if (Value *V = SimplifyInstruction(P)) { + P->replaceAllUsesWith(V); + P->eraseFromParent(); + ++NumPHIsElim; + return true; + } + return false; + } + + if (CastInst *CI = dyn_cast<CastInst>(I)) { + // If the source of the cast is a constant, then this should have + // already been constant folded. The only reason NOT to constant fold + // it is if something (e.g. LSR) was careful to place the constant + // evaluation in a block other than then one that uses it (e.g. to hoist + // the address of globals out of a loop). If this is the case, we don't + // want to forward-subst the cast. + if (isa<Constant>(CI->getOperand(0))) + return false; + + if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) + return true; + + if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { + bool MadeChange = MoveExtToFormExtLoad(I); + return MadeChange | OptimizeExtUses(I); + } + return false; + } + + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + return OptimizeCmpExpression(CI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + return false; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType()); + return false; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + if (GEPI->hasAllZeroIndices()) { + /// The GEP operand must be a pointer, so must its result -> BitCast + Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NC); + GEPI->eraseFromParent(); + ++NumGEPsElim; + OptimizeInst(NC); + return true; + } + return false; + } + + if (CallInst *CI = dyn_cast<CallInst>(I)) + return OptimizeCallInst(CI); + + return false; +} + // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. @@ -908,74 +1094,11 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { } } - // Keep track of non-local addresses that have been sunk into this block. - // This allows us to avoid inserting duplicate code for blocks with multiple - // load/stores of the same address. - DenseMap<Value*, Value*> SunkAddrs; - - for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { - Instruction *I = BBI++; + SunkAddrs.clear(); - if (CastInst *CI = dyn_cast<CastInst>(I)) { - // If the source of the cast is a constant, then this should have - // already been constant folded. The only reason NOT to constant fold - // it is if something (e.g. LSR) was careful to place the constant - // evaluation in a block other than then one that uses it (e.g. to hoist - // the address of globals out of a loop). If this is the case, we don't - // want to forward-subst the cast. - if (isa<Constant>(CI->getOperand(0))) - continue; - - bool Change = false; - if (TLI) { - Change = OptimizeNoopCopyExpression(CI, *TLI); - MadeChange |= Change; - } - - if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { - MadeChange |= MoveExtToFormExtLoad(I); - MadeChange |= OptimizeExtUses(I); - } - } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { - MadeChange |= OptimizeCmpExpression(CI); - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (TLI) - MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), - SunkAddrs); - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (TLI) - MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), - SI->getOperand(0)->getType(), - SunkAddrs); - } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { - if (GEPI->hasAllZeroIndices()) { - /// The GEP operand must be a pointer, so must its result -> BitCast - Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), - GEPI->getName(), GEPI); - GEPI->replaceAllUsesWith(NC); - GEPI->eraseFromParent(); - MadeChange = true; - BBI = NC; - } - } else if (CallInst *CI = dyn_cast<CallInst>(I)) { - // If we found an inline asm expession, and if the target knows how to - // lower it to normal LLVM code, do so now. - if (TLI && isa<InlineAsm>(CI->getCalledValue())) { - if (TLI->ExpandInlineAsm(CI)) { - BBI = BB.begin(); - // Avoid processing instructions out of order, which could cause - // reuse before a value is defined. - SunkAddrs.clear(); - } else - // Sink address computing for memory operands into the block. - MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); - } else { - // Other CallInst optimizations that don't need to muck with the - // enclosing iterator here. - MadeChange |= OptimizeCallInst(CI); - } - } - } + CurInstIterator = BB.begin(); + for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) + MadeChange |= OptimizeInst(CurInstIterator++); return MadeChange; } diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp index a0ea369..664c3f6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp @@ -34,7 +34,9 @@ STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { struct ConstantPropagation : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ConstantPropagation() : FunctionPass(ID) {} + ConstantPropagation() : FunctionPass(ID) { + initializeConstantPropagationPass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); @@ -46,7 +48,7 @@ namespace { char ConstantPropagation::ID = 0; INITIALIZE_PASS(ConstantPropagation, "constprop", - "Simple constant propagation", false, false); + "Simple constant propagation", false, false) FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 0d4e45d..be12973 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" @@ -30,18 +31,20 @@ STATISTIC(NumCmps, "Number of comparisons propagated"); namespace { class CorrelatedValuePropagation : public FunctionPass { LazyValueInfo *LVI; - + bool processSelect(SelectInst *SI); bool processPHI(PHINode *P); bool processMemAccess(Instruction *I); bool processCmp(CmpInst *C); - + public: static char ID; - CorrelatedValuePropagation(): FunctionPass(ID) { } - + CorrelatedValuePropagation(): FunctionPass(ID) { + initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LazyValueInfo>(); } @@ -49,8 +52,11 @@ namespace { } char CorrelatedValuePropagation::ID = 0; -INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation", - "Value Propagation", false, false); +INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false) // Public interface to the Value Propagation pass Pass *llvm::createCorrelatedValuePropagationPass() { @@ -60,46 +66,51 @@ Pass *llvm::createCorrelatedValuePropagationPass() { bool CorrelatedValuePropagation::processSelect(SelectInst *S) { if (S->getType()->isVectorTy()) return false; if (isa<Constant>(S->getOperand(0))) return false; - + Constant *C = LVI->getConstant(S->getOperand(0), S->getParent()); if (!C) return false; - + ConstantInt *CI = dyn_cast<ConstantInt>(C); if (!CI) return false; - - S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2)); + + Value *ReplaceWith = S->getOperand(1); + Value *Other = S->getOperand(2); + if (!CI->isOne()) std::swap(ReplaceWith, Other); + if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType()); + + S->replaceAllUsesWith(ReplaceWith); S->eraseFromParent(); ++NumSelects; - + return true; } bool CorrelatedValuePropagation::processPHI(PHINode *P) { bool Changed = false; - + BasicBlock *BB = P->getParent(); for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { Value *Incoming = P->getIncomingValue(i); if (isa<Constant>(Incoming)) continue; - + Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), P->getIncomingBlock(i), BB); if (!C) continue; - + P->setIncomingValue(i, C); Changed = true; } - - if (Value *ConstVal = P->hasConstantValue()) { - P->replaceAllUsesWith(ConstVal); + + if (Value *V = SimplifyInstruction(P)) { + P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; } - + ++NumPhis; - + return Changed; } @@ -109,12 +120,12 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { Pointer = L->getPointerOperand(); else Pointer = cast<StoreInst>(I)->getPointerOperand(); - + if (isa<Constant>(Pointer)) return false; - + Constant *C = LVI->getConstant(Pointer, I->getParent()); if (!C) return false; - + ++NumMemAccess; I->replaceUsesOfWith(Pointer, C); return true; @@ -130,32 +141,32 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) { if (isa<Instruction>(Op0) && cast<Instruction>(Op0)->getParent() == C->getParent()) return false; - + Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); if (!Op1) return false; - + pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent()); if (PI == PE) return false; - - LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), + + LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), C->getOperand(0), Op1, *PI, C->getParent()); if (Result == LazyValueInfo::Unknown) return false; ++PI; while (PI != PE) { - LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), + LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), C->getOperand(0), Op1, *PI, C->getParent()); if (Res != Result) return false; ++PI; } - + ++NumCmps; - + if (Result == LazyValueInfo::True) C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext())); else C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext())); - + C->eraseFromParent(); return true; @@ -163,9 +174,9 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) { bool CorrelatedValuePropagation::runOnFunction(Function &F) { LVI = &getAnalysis<LazyValueInfo>(); - + bool FnChanged = false; - + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { bool BBChanged = false; for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { @@ -187,14 +198,9 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { break; } } - - // Propagating correlated values might leave cruft around. - // Try to clean it up before we continue. - if (BBChanged) - SimplifyInstructionsInBlock(FI); - + FnChanged |= BBChanged; } - + return FnChanged; } diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp index 87ea803..dbb68f3 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp @@ -35,7 +35,9 @@ namespace { // struct DeadInstElimination : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - DeadInstElimination() : BasicBlockPass(ID) {} + DeadInstElimination() : BasicBlockPass(ID) { + initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnBasicBlock(BasicBlock &BB) { bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { @@ -57,7 +59,7 @@ namespace { char DeadInstElimination::ID = 0; INITIALIZE_PASS(DeadInstElimination, "die", - "Dead Instruction Elimination", false, false); + "Dead Instruction Elimination", false, false) Pass *llvm::createDeadInstEliminationPass() { return new DeadInstElimination(); @@ -70,7 +72,9 @@ namespace { // struct DCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - DCE() : FunctionPass(ID) {} + DCE() : FunctionPass(ID) { + initializeDCEPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -81,7 +85,7 @@ namespace { } char DCE::ID = 0; -INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false); +INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) bool DCE::runOnFunction(Function &F) { // Start out with all of the instructions in the worklist... diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index c8fd9d9..867a06a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -19,17 +19,20 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); @@ -37,58 +40,107 @@ STATISTIC(NumFastOther , "Number of other instrs removed"); namespace { struct DSE : public FunctionPass { - TargetData *TD; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID) {} + DSE() : FunctionPass(ID), AA(0), MD(0) { + initializeDSEPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F) { - bool Changed = false; - + AA = &getAnalysis<AliasAnalysis>(); + MD = &getAnalysis<MemoryDependenceAnalysis>(); DominatorTree &DT = getAnalysis<DominatorTree>(); + bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. if (DT.isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); + + AA = 0; MD = 0; return Changed; } bool runOnBasicBlock(BasicBlock &BB); - bool handleFreeWithNonTrivialDependency(const CallInst *F, - MemDepResult Dep); + bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); - bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, - BasicBlock::iterator &BBI, - SmallPtrSet<Value*, 64> &deadPointers); - void DeleteDeadInstruction(Instruction *I, - SmallPtrSet<Value*, 64> *deadPointers = 0); - + void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, + SmallPtrSet<Value*, 16> &DeadStackObjects); - // getAnalysisUsage - We require post dominance frontiers (aka Control - // Dependence Graph) virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<DominatorTree>(); AU.addRequired<AliasAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<DominatorTree>(); AU.addPreserved<MemoryDependenceAnalysis>(); } - - unsigned getPointerSize(Value *V) const; }; } char DSE::ID = 0; -INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false); +INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } -/// doesClobberMemory - Does this instruction clobber (write without reading) -/// some memory? -static bool doesClobberMemory(Instruction *I) { +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// and zero out all the operands of this instruction. If any of them become +/// dead, delete them and the computation tree that feeds them. +/// +/// If ValueSet is non-null, remove any deleted instructions from it as well. +/// +static void DeleteDeadInstruction(Instruction *I, + MemoryDependenceAnalysis &MD, + SmallPtrSet<Value*, 16> *ValueSet = 0) { + SmallVector<Instruction*, 32> NowDeadInsts; + + NowDeadInsts.push_back(I); + --NumFastOther; + + // Before we touch this instruction, remove it from memdep! + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // This instruction is dead, zap it, in stages. Start by removing it from + // MemDep, which needs to know the operands and needs it to be in the + // function. + MD.removeInstruction(DeadInst); + + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { + Value *Op = DeadInst->getOperand(op); + DeadInst->setOperand(op, 0); + + // If this operand just became dead, add it to the NowDeadInsts list. + if (!Op->use_empty()) continue; + + if (Instruction *OpI = dyn_cast<Instruction>(Op)) + if (isInstructionTriviallyDead(OpI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + + if (ValueSet) ValueSet->erase(DeadInst); + } while (!NowDeadInsts.empty()); +} + + +/// hasMemoryWrite - Does this instruction write some memory? This only returns +/// true for things that we can analyze with other helpers below. +static bool hasMemoryWrite(Instruction *I) { if (isa<StoreInst>(I)) return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { @@ -106,146 +158,296 @@ static bool doesClobberMemory(Instruction *I) { return false; } -/// isElidable - If the value of this instruction and the memory it writes to is -/// unused, may we delete this instrtction? -static bool isElidable(Instruction *I) { - assert(doesClobberMemory(I)); - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) - return II->getIntrinsicID() != Intrinsic::lifetime_end; +/// getLocForWrite - Return a Location stored to by the specified instruction. +static AliasAnalysis::Location +getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return AA.getLocation(SI); + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { + // memcpy/memmove/memset. + AliasAnalysis::Location Loc = AA.getLocationForDest(MI); + // If we don't have target data around, an unknown size in Location means + // that we should use the size of the pointee type. This isn't valid for + // memset/memcpy, which writes more than an i8. + if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0) + return AliasAnalysis::Location(); + return Loc; + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); + if (II == 0) return AliasAnalysis::Location(); + + switch (II->getIntrinsicID()) { + default: return AliasAnalysis::Location(); // Unhandled intrinsic. + case Intrinsic::init_trampoline: + // If we don't have target data around, an unknown size in Location means + // that we should use the size of the pointee type. This isn't valid for + // init.trampoline, which writes more than an i8. + if (AA.getTargetData() == 0) return AliasAnalysis::Location(); + + // FIXME: We don't know the size of the trampoline, so we can't really + // handle it here. + return AliasAnalysis::Location(II->getArgOperand(0)); + case Intrinsic::lifetime_end: { + uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + return AliasAnalysis::Location(II->getArgOperand(1), Len); + } + } +} + +/// getLocForRead - Return the location read by the specified "hasMemoryWrite" +/// instruction if any. +static AliasAnalysis::Location +getLocForRead(Instruction *Inst, AliasAnalysis &AA) { + assert(hasMemoryWrite(Inst) && "Unknown instruction case"); + + // The only instructions that both read and write are the mem transfer + // instructions (memcpy/memmove). + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) + return AA.getLocationForSource(MTI); + return AliasAnalysis::Location(); +} + + +/// isRemovable - If the value of this instruction and the memory it writes to +/// is unused, may we delete this instruction? +static bool isRemovable(Instruction *I) { + // Don't remove volatile stores. if (StoreInst *SI = dyn_cast<StoreInst>(I)) return !SI->isVolatile(); - return true; + + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate"); + case Intrinsic::lifetime_end: + // Never remove dead lifetime_end's, e.g. because it is followed by a + // free. + return false; + case Intrinsic::init_trampoline: + // Always safe to remove init_trampoline. + return true; + + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + // Don't remove volatile memory intrinsics. + return !cast<MemIntrinsic>(II)->isVolatile(); + } } -/// getPointerOperand - Return the pointer that is being clobbered. -static Value *getPointerOperand(Instruction *I) { - assert(doesClobberMemory(I)); +/// getStoredPointerOperand - Return the pointer that is being written to. +static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) - return MI->getArgOperand(0); + return MI->getDest(); IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: return II->getArgOperand(0); - case Intrinsic::lifetime_end: - return II->getArgOperand(1); } } -/// getStoreSize - Return the length in bytes of the write by the clobbering -/// instruction. If variable or unknown, returns -1. -static unsigned getStoreSize(Instruction *I, const TargetData *TD) { - assert(doesClobberMemory(I)); - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!TD) return -1u; - return TD->getTypeStoreSize(SI->getOperand(0)->getType()); +static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { + const TargetData *TD = AA.getTargetData(); + if (TD == 0) + return AliasAnalysis::UnknownSize; + + if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { + // Get size information for the alloca + if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) + return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); + return AliasAnalysis::UnknownSize; } + + assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); + const PointerType *PT = cast<PointerType>(V->getType()); + return TD->getTypeAllocSize(PT->getElementType()); +} - Value *Len; - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { - Len = MI->getLength(); - } else { - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: assert(false && "Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return -1u; - case Intrinsic::lifetime_end: - Len = II->getArgOperand(0); - break; +/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is +/// pointing to an object with a pointer size we can trust. +static bool isObjectPointerWithTrustworthySize(const Value *V) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) + return !AI->isArrayAllocation(); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return !GV->mayBeOverridden(); + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + return false; +} + +/// isCompleteOverwrite - Return true if a store to the 'Later' location +/// completely overwrites a store to the 'Earlier' location. +static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, + const AliasAnalysis::Location &Earlier, + AliasAnalysis &AA) { + const Value *P1 = Earlier.Ptr->stripPointerCasts(); + const Value *P2 = Later.Ptr->stripPointerCasts(); + + // If the start pointers are the same, we just have to compare sizes to see if + // the later store was larger than the earlier store. + if (P1 == P2) { + // If we don't know the sizes of either access, then we can't do a + // comparison. + if (Later.Size == AliasAnalysis::UnknownSize || + Earlier.Size == AliasAnalysis::UnknownSize) { + // If we have no TargetData information around, then the size of the store + // is inferrable from the pointee type. If they are the same type, then + // we know that the store is safe. + if (AA.getTargetData() == 0) + return Later.Ptr->getType() == Earlier.Ptr->getType(); + return false; } + + // Make sure that the Later size is >= the Earlier size. + if (Later.Size < Earlier.Size) + return false; + return true; } - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len)) - if (!LenCI->isAllOnesValue()) - return LenCI->getZExtValue(); - return -1u; + + // Otherwise, we have to have size information, and the later store has to be + // larger than the earlier one. + if (Later.Size == AliasAnalysis::UnknownSize || + Earlier.Size == AliasAnalysis::UnknownSize || + Later.Size <= Earlier.Size || AA.getTargetData() == 0) + return false; + + // Check to see if the later store is to the entire object (either a global, + // an alloca, or a byval argument). If so, then it clearly overwrites any + // other store to the same object. + const TargetData &TD = *AA.getTargetData(); + + const Value *UO1 = GetUnderlyingObject(P1, &TD), + *UO2 = GetUnderlyingObject(P2, &TD); + + // If we can't resolve the same pointers to the same object, then we can't + // analyze them at all. + if (UO1 != UO2) + return false; + + // If the "Later" store is to a recognizable object, get its size. + if (isObjectPointerWithTrustworthySize(UO2)) { + uint64_t ObjectSize = + TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType()); + if (ObjectSize == Later.Size) + return true; + } + + // Okay, we have stores to two completely different pointers. Try to + // decompose the pointer into a "base + constant_offset" form. If the base + // pointers are equal, then we can reason about the two stores. + int64_t Off1 = 0, Off2 = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD); + + // If the base pointers still differ, we have two completely different stores. + if (BP1 != BP2) + return false; + + // Otherwise, we might have a situation like: + // store i16 -> P + 1 Byte + // store i32 -> P + // In this case, we see if the later store completely overlaps all bytes + // stored by the previous store. + if (Off1 < Off2 || // Earlier starts before Later. + Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later. + return false; + // Otherwise, we have complete overlap. + return true; } -/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is -/// greater than or equal to the store in I2. This returns false if we don't -/// know. +/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a +/// memory region into an identical pointer) then it doesn't actually make its +/// input dead in the traditional sense. Consider this case: +/// +/// memcpy(A <- B) +/// memcpy(A <- A) +/// +/// In this case, the second store to A does not make the first store to A dead. +/// The usual situation isn't an explicit A<-A store like this (which can be +/// trivially removed) but a case where two pointers may alias. /// -static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2, - const TargetData *TD) { - const Type *I1Ty = getPointerOperand(I1)->getType(); - const Type *I2Ty = getPointerOperand(I2)->getType(); +/// This function detects when it is unsafe to remove a dependent instruction +/// because the DSE inducing instruction may be a self-read. +static bool isPossibleSelfRead(Instruction *Inst, + const AliasAnalysis::Location &InstStoreLoc, + Instruction *DepWrite, AliasAnalysis &AA) { + // Self reads can only happen for instructions that read memory. Get the + // location read. + AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); + if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. - // Exactly the same type, must have exactly the same size. - if (I1Ty == I2Ty) return true; + // If the read and written loc obviously don't alias, it isn't a read. + if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; - int I1Size = getStoreSize(I1, TD); - int I2Size = getStoreSize(I2, TD); + // Okay, 'Inst' may copy over itself. However, we can still remove a the + // DepWrite instruction if we can prove that it reads from the same location + // as Inst. This handles useful cases like: + // memcpy(A <- B) + // memcpy(A <- B) + // Here we don't know if A/B may alias, but we do know that B/B are must + // aliases, so removing the first memcpy is safe (assuming it writes <= # + // bytes as the second one. + AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA); - return I1Size != -1 && I2Size != -1 && I1Size >= I2Size; + if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) + return false; + + // If DepWrite doesn't read memory or if we can't prove it is a must alias, + // then it can't be considered dead. + return true; } -bool DSE::runOnBasicBlock(BasicBlock &BB) { - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - TD = getAnalysisIfAvailable<TargetData>(); +//===----------------------------------------------------------------------===// +// DSE Pass +//===----------------------------------------------------------------------===// + +bool DSE::runOnBasicBlock(BasicBlock &BB) { bool MadeChange = false; // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; - // If we find a store or a free, get its memory dependence. - if (!doesClobberMemory(Inst) && !isFreeCall(Inst)) - continue; - - MemDepResult InstDep = MD.getDependency(Inst); - - // Ignore non-local stores. - // FIXME: cross-block DSE would be fun. :) - if (InstDep.isNonLocal()) continue; - - // Handle frees whose dependencies are non-trivial. - if (const CallInst *F = isFreeCall(Inst)) { - MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep); + // Handle 'free' calls specially. + if (CallInst *F = isFreeCall(Inst)) { + MadeChange |= HandleFree(F); continue; } - // If not a definite must-alias dependency, ignore it. - if (!InstDep.isDef()) + // If we find something that writes memory, get its memory dependence. + if (!hasMemoryWrite(Inst)) continue; - - // If this is a store-store dependence, then the previous store is dead so - // long as this store is at least as big as it. - if (doesClobberMemory(InstDep.getInst())) { - Instruction *DepStore = InstDep.getInst(); - if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) && - isElidable(DepStore)) { - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepStore); - ++NumFastStores; - MadeChange = true; - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; - continue; - } - } + MemDepResult InstDep = MD->getDependency(Inst); - if (!isElidable(Inst)) + // Ignore non-local store liveness. + // FIXME: cross-block DSE would be fun. :) + if (InstDep.isNonLocal() || + // Ignore self dependence, which happens in the entry block of the + // function. + InstDep.getInst() == Inst) continue; - + // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad) { + SI->getOperand(0) == DepLoad && !SI->isVolatile()) { + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " + << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); + // DeleteDeadInstruction can delete the current instruction. Save BBI // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI); + DeleteDeadInstruction(SI, *MD); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -258,24 +460,63 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { } } - // If this is a lifetime end marker, we can throw away the store. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) { - if (II->getIntrinsicID() == Intrinsic::lifetime_end) { - // Delete the store and now-dead instructions that feed it. - // DeleteDeadInstruction can delete the current instruction. Save BBI - // in case we need it. - WeakVH NextInst(BBI); - - DeleteDeadInstruction(Inst); + // Figure out what location is being stored to. + AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); + + // If we didn't get a useful location, fail. + if (Loc.Ptr == 0) + continue; + + while (!InstDep.isNonLocal()) { + // Get the memory clobbered by the instruction we depend on. MemDep will + // skip any instructions that 'Loc' clearly doesn't interact with. If we + // end up depending on a may- or must-aliased load, then we can't optimize + // away the store and we bail out. However, if we depend on on something + // that overwrites the memory location we *can* potentially optimize it. + // + // Find out what memory location the dependant instruction stores. + Instruction *DepWrite = InstDep.getInst(); + AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); + // If we didn't get a useful location, or if it isn't a size, bail out. + if (DepLoc.Ptr == 0) + break; + + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know that + // 'Inst' doesn't load from, then we can remove it. + if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); - if (NextInst == 0) // Next instruction deleted. - BBI = BB.begin(); - else if (BBI != BB.begin()) // Revisit this instruction if possible. - --BBI; + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD); ++NumFastStores; MadeChange = true; - continue; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; } + + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that stores + // to the same location. For example, in: + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and Q + // alias. + if (DepWrite == &BB.front()) break; + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref) + break; + + InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); } } @@ -287,26 +528,36 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { return MadeChange; } -/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose -/// dependency is a store to a field of that structure. -bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, - MemDepResult Dep) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - - Instruction *Dependency = Dep.getInst(); - if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency)) - return false; +/// HandleFree - Handle frees of entire structures whose dependency is a store +/// to a field of that structure. +bool DSE::HandleFree(CallInst *F) { + MemDepResult Dep = MD->getDependency(F); + do { + if (Dep.isNonLocal()) return false; + + Instruction *Dependency = Dep.getInst(); + if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + return false; - Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); + Value *DepPointer = + GetUnderlyingObject(getStoredPointerOperand(Dependency)); - // Check for aliasing. - if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) != - AliasAnalysis::MustAlias) - return false; + // Check for aliasing. + if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) + return false; + + // DCE instructions only used to calculate that store + DeleteDeadInstruction(Dependency, *MD); + ++NumFastStores; + + // Inst's old Dependency is now deleted. Compute the next dependency, + // which may also be dead, as in + // s[0] = 0; + // s[1] = 0; // This has just been deleted. + // free(s); + Dep = MD->getDependency(F); + } while (!Dep.isNonLocal()); - // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency); - ++NumFastStores; return true; } @@ -317,259 +568,163 @@ bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - bool MadeChange = false; - // Pointers alloca'd in this function are dead in the end block - SmallPtrSet<Value*, 64> deadPointers; + // Keep track of all of the stack objects that are dead at the end of the + // function. + SmallPtrSet<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - deadPointers.insert(AI); + DeadStackObjects.insert(AI); // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) - deadPointers.insert(AI); + DeadStackObjects.insert(AI); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; - // If we find a store whose pointer is dead. - if (doesClobberMemory(BBI)) { - if (isElidable(BBI)) { - // See through pointer-to-pointer bitcasts - Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject(); - - // Alloca'd pointers or byval arguments (which are functionally like - // alloca's) are valid candidates for removal. - if (deadPointers.count(pointerOperand)) { - // DCE instructions only used to calculate that store. - Instruction *Dead = BBI; - ++BBI; - DeleteDeadInstruction(Dead, &deadPointers); - ++NumFastStores; - MadeChange = true; - continue; - } - } - - // Because a memcpy or memmove is also a load, we can't skip it if we - // didn't remove it. - if (!isa<MemTransferInst>(BBI)) + // If we find a store, check to see if it points into a dead stack value. + if (hasMemoryWrite(BBI) && isRemovable(BBI)) { + // See through pointer-to-pointer bitcasts + Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI)); + + // Stores to stack values are valid candidates for removal. + if (DeadStackObjects.count(Pointer)) { + Instruction *Dead = BBI++; + + DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " + << *Dead << "\n Object: " << *Pointer << '\n'); + + // DCE instructions only used to calculate that store. + DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + ++NumFastStores; + MadeChange = true; continue; + } } - Value *killPointer = 0; - uint64_t killPointerSize = ~0UL; + // Remove any dead non-memory-mutating instructions. + if (isInstructionTriviallyDead(BBI)) { + Instruction *Inst = BBI++; + DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); + ++NumFastOther; + MadeChange = true; + continue; + } - // If we encounter a use of the pointer, it is no longer considered dead - if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { - // However, if this load is unused and not volatile, we can go ahead and - // remove it, and not have to worry about it making our pointer undead! - if (L->use_empty() && !L->isVolatile()) { - ++BBI; - DeleteDeadInstruction(L, &deadPointers); - ++NumFastOther; - MadeChange = true; - continue; - } - - killPointer = L->getPointerOperand(); - } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { - killPointer = V->getOperand(0); - } else if (isa<MemTransferInst>(BBI) && - isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) { - killPointer = cast<MemTransferInst>(BBI)->getSource(); - killPointerSize = cast<ConstantInt>( - cast<MemTransferInst>(BBI)->getLength())->getZExtValue(); - } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { - deadPointers.erase(A); - - // Dead alloca's can be DCE'd when we reach them - if (A->use_empty()) { - ++BBI; - DeleteDeadInstruction(A, &deadPointers); - ++NumFastOther; - MadeChange = true; - } - + if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { + DeadStackObjects.erase(A); continue; - } else if (CallSite CS = cast<Value>(BBI)) { - // If this call does not access memory, it can't - // be undeadifying any of our pointers. - if (AA.doesNotAccessMemory(CS)) + } + + if (CallSite CS = cast<Value>(BBI)) { + // If this call does not access memory, it can't be loading any of our + // pointers. + if (AA->doesNotAccessMemory(CS)) continue; - unsigned modRef = 0; - unsigned other = 0; + unsigned NumModRef = 0, NumOther = 0; - // Remove any pointers made undead by the call from the dead set - std::vector<Value*> dead; - for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(), - E = deadPointers.end(); I != E; ++I) { - // HACK: if we detect that our AA is imprecise, it's not - // worth it to scan the rest of the deadPointers set. Just - // assume that the AA will return ModRef for everything, and - // go ahead and bail. - if (modRef >= 16 && other == 0) { - deadPointers.clear(); + // If the call might load from any of our allocas, then any store above + // the call is live. + SmallVector<Value*, 8> LiveAllocas; + for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), + E = DeadStackObjects.end(); I != E; ++I) { + // If we detect that our AA is imprecise, it's not worth it to scan the + // rest of the DeadPointers set. Just assume that the AA will return + // ModRef for everything, and go ahead and bail out. + if (NumModRef >= 16 && NumOther == 0) return MadeChange; - } - - // See if the call site touches it - AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, - getPointerSize(*I)); + + // See if the call site touches it. + AliasAnalysis::ModRefResult A = + AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); if (A == AliasAnalysis::ModRef) - ++modRef; + ++NumModRef; else - ++other; + ++NumOther; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) - dead.push_back(*I); + LiveAllocas.push_back(*I); } - - for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end(); - I != E; ++I) - deadPointers.erase(*I); - continue; - } else if (isInstructionTriviallyDead(BBI)) { - // For any non-memory-affecting non-terminators, DCE them as we reach them - Instruction *Inst = BBI; - ++BBI; - DeleteDeadInstruction(Inst, &deadPointers); - ++NumFastOther; - MadeChange = true; + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), + E = LiveAllocas.end(); I != E; ++I) + DeadStackObjects.erase(*I); + + // If all of the allocas were clobbered by the call then we're not going + // to find anything else to process. + if (DeadStackObjects.empty()) + return MadeChange; + continue; } - if (!killPointer) + AliasAnalysis::Location LoadedLoc; + + // If we encounter a use of the pointer, it is no longer considered dead + if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { + LoadedLoc = AA->getLocation(L); + } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { + LoadedLoc = AA->getLocation(V); + } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { + LoadedLoc = AA->getLocationForSource(MTI); + } else { + // Not a loading instruction. continue; + } - killPointer = killPointer->getUnderlyingObject(); + // Remove any allocas from the DeadPointer set that are loaded, as this + // makes any stores above the access live. + RemoveAccessedObjects(LoadedLoc, DeadStackObjects); - // Deal with undead pointers - MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI, - deadPointers); + // If all of the allocas were clobbered by the access then we're not going + // to find anything else to process. + if (DeadStackObjects.empty()) + break; } return MadeChange; } -/// RemoveUndeadPointers - check for uses of a pointer that make it -/// undead when scanning for dead stores to alloca's. -bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, - BasicBlock::iterator &BBI, - SmallPtrSet<Value*, 64> &deadPointers) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - - // If the kill pointer can be easily reduced to an alloca, - // don't bother doing extraneous AA queries. - if (deadPointers.count(killPointer)) { - deadPointers.erase(killPointer); - return false; - } - - // A global can't be in the dead pointer set. - if (isa<GlobalValue>(killPointer)) - return false; - - bool MadeChange = false; +/// RemoveAccessedObjects - Check to see if the specified location may alias any +/// of the stack objects in the DeadStackObjects set. If so, they become live +/// because the location is being loaded. +void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, + SmallPtrSet<Value*, 16> &DeadStackObjects) { + const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr); + + // A constant can't be in the dead pointer set. + if (isa<Constant>(UnderlyingPointer)) + return; - SmallVector<Value*, 16> undead; + // If the kill pointer can be easily reduced to an alloca, don't bother doing + // extraneous AA queries. + if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { + DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer)); + return; + } - for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(), - E = deadPointers.end(); I != E; ++I) { - // See if this pointer could alias it - AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I), - killPointer, killPointerSize); - - // If it must-alias and a store, we can delete it - if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) { - StoreInst *S = cast<StoreInst>(BBI); - - // Remove it! - ++BBI; - DeleteDeadInstruction(S, &deadPointers); - ++NumFastStores; - MadeChange = true; - - continue; - - // Otherwise, it is undead - } else if (A != AliasAnalysis::NoAlias) - undead.push_back(*I); + SmallVector<Value*, 16> NowLive; + for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), + E = DeadStackObjects.end(); I != E; ++I) { + // See if the loaded location could alias the stack location. + AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA)); + if (!AA->isNoAlias(StackLoc, LoadedLoc)) + NowLive.push_back(*I); } - for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end(); + for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end(); I != E; ++I) - deadPointers.erase(*I); - - return MadeChange; + DeadStackObjects.erase(*I); } -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through -/// and zero out all the operands of this instruction. If any of them become -/// dead, delete them and the computation tree that feeds them. -/// -/// If ValueSet is non-null, remove any deleted instructions from it as well. -/// -void DSE::DeleteDeadInstruction(Instruction *I, - SmallPtrSet<Value*, 64> *ValueSet) { - SmallVector<Instruction*, 32> NowDeadInsts; - - NowDeadInsts.push_back(I); - --NumFastOther; - - // Before we touch this instruction, remove it from memdep! - MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); - do { - Instruction *DeadInst = NowDeadInsts.pop_back_val(); - - ++NumFastOther; - - // This instruction is dead, zap it, in stages. Start by removing it from - // MemDep, which needs to know the operands and needs it to be in the - // function. - MDA.removeInstruction(DeadInst); - - for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { - Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); - - // If this operand just became dead, add it to the NowDeadInsts list. - if (!Op->use_empty()) continue; - - if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) - NowDeadInsts.push_back(OpI); - } - - DeadInst->eraseFromParent(); - - if (ValueSet) ValueSet->erase(DeadInst); - } while (!NowDeadInsts.empty()); -} - -unsigned DSE::getPointerSize(Value *V) const { - if (TD) { - if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { - // Get size information for the alloca - if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) - return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); - } else { - assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); - const PointerType *PT = cast<PointerType>(V->getType()); - return TD->getTypeAllocSize(PT->getElementType()); - } - } - return ~0U; -} diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp new file mode 100644 index 0000000..3d3f17b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -0,0 +1,470 @@ +//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs a simple dominator tree walk that eliminates trivially +// redundant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-cse" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/RecyclingAllocator.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); +STATISTIC(NumCSE, "Number of instructions CSE'd"); +STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); +STATISTIC(NumCSECall, "Number of call instructions CSE'd"); +STATISTIC(NumDSE, "Number of trivial dead stores removed"); + +static unsigned getHash(const void *V) { + return DenseMapInfo<const void*>::getHashValue(V); +} + +//===----------------------------------------------------------------------===// +// SimpleValue +//===----------------------------------------------------------------------===// + +namespace { + /// SimpleValue - Instances of this struct represent available values in the + /// scoped hash table. + struct SimpleValue { + Instruction *Inst; + + SimpleValue(Instruction *I) : Inst(I) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || + Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + // This can only handle non-void readnone functions. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) + return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); + return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) || + isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) || + isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || + isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) || + isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst); + } + }; +} + +namespace llvm { +// SimpleValue is POD. +template<> struct isPodLike<SimpleValue> { + static const bool value = true; +}; + +template<> struct DenseMapInfo<SimpleValue> { + static inline SimpleValue getEmptyKey() { + return DenseMapInfo<Instruction*>::getEmptyKey(); + } + static inline SimpleValue getTombstoneKey() { + return DenseMapInfo<Instruction*>::getTombstoneKey(); + } + static unsigned getHashValue(SimpleValue Val); + static bool isEqual(SimpleValue LHS, SimpleValue RHS); +}; +} + +unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { + Instruction *Inst = Val.Inst; + + // Hash in all of the operands as pointers. + unsigned Res = 0; + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + Res ^= getHash(Inst->getOperand(i)) << i; + + if (CastInst *CI = dyn_cast<CastInst>(Inst)) + Res ^= getHash(CI->getType()); + else if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) + Res ^= CI->getPredicate(); + else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) { + for (ExtractValueInst::idx_iterator I = EVI->idx_begin(), + E = EVI->idx_end(); I != E; ++I) + Res ^= *I; + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) { + for (InsertValueInst::idx_iterator I = IVI->idx_begin(), + E = IVI->idx_end(); I != E; ++I) + Res ^= *I; + } else { + // nothing extra to hash in. + assert((isa<CallInst>(Inst) || + isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) || + isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || + isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) && + "Invalid/unknown instruction"); + } + + // Mix in the opcode. + return (Res << 1) ^ Inst->getOpcode(); +} + +bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + + if (LHSI->getOpcode() != RHSI->getOpcode()) return false; + return LHSI->isIdenticalTo(RHSI); +} + +//===----------------------------------------------------------------------===// +// CallValue +//===----------------------------------------------------------------------===// + +namespace { + /// CallValue - Instances of this struct represent available call values in + /// the scoped hash table. + struct CallValue { + Instruction *Inst; + + CallValue(Instruction *I) : Inst(I) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || + Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + // Don't value number anything that returns void. + if (Inst->getType()->isVoidTy()) + return false; + + CallInst *CI = dyn_cast<CallInst>(Inst); + if (CI == 0 || !CI->onlyReadsMemory()) + return false; + return true; + } + }; +} + +namespace llvm { + // CallValue is POD. + template<> struct isPodLike<CallValue> { + static const bool value = true; + }; + + template<> struct DenseMapInfo<CallValue> { + static inline CallValue getEmptyKey() { + return DenseMapInfo<Instruction*>::getEmptyKey(); + } + static inline CallValue getTombstoneKey() { + return DenseMapInfo<Instruction*>::getTombstoneKey(); + } + static unsigned getHashValue(CallValue Val); + static bool isEqual(CallValue LHS, CallValue RHS); + }; +} +unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { + Instruction *Inst = Val.Inst; + // Hash in all of the operands as pointers. + unsigned Res = 0; + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { + assert(!Inst->getOperand(i)->getType()->isMetadataTy() && + "Cannot value number calls with metadata operands"); + Res ^= getHash(Inst->getOperand(i)) << i; + } + + // Mix in the opcode. + return (Res << 1) ^ Inst->getOpcode(); +} + +bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + return LHSI->isIdenticalTo(RHSI); +} + + +//===----------------------------------------------------------------------===// +// EarlyCSE pass. +//===----------------------------------------------------------------------===// + +namespace { + +/// EarlyCSE - This pass does a simple depth-first walk over the dominator +/// tree, eliminating trivially redundant instructions and using instsimplify +/// to canonicalize things as it goes. It is intended to be fast and catch +/// obvious cases so that instcombine and other passes are more effective. It +/// is expected that a later pass of GVN will catch the interesting/hard +/// cases. +class EarlyCSE : public FunctionPass { +public: + const TargetData *TD; + DominatorTree *DT; + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; + typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>, + AllocatorTy> ScopedHTType; + + /// AvailableValues - This scoped hash table contains the current values of + /// all of our simple scalar expressions. As we walk down the domtree, we + /// look to see if instructions are in this: if so, we replace them with what + /// we find, otherwise we insert them so that dominated values can succeed in + /// their lookup. + ScopedHTType *AvailableValues; + + /// AvailableLoads - This scoped hash table contains the current values + /// of loads. This allows us to get efficient access to dominating loads when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is + /// incremented after every possibly writing memory operation, which ensures + /// that we only CSE loads with other loads that have no intervening store. + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator; + typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>, + DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType; + LoadHTType *AvailableLoads; + + /// AvailableCalls - This scoped hash table contains the current values + /// of read-only call values. It uses the same generation count as loads. + typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType; + CallHTType *AvailableCalls; + + /// CurrentGeneration - This is the current generation of the memory value. + unsigned CurrentGeneration; + + static char ID; + explicit EarlyCSE() : FunctionPass(ID) { + initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + +private: + + bool processNode(DomTreeNode *Node); + + // This transformation requires dominator postdominator info + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); + } +}; +} + +char EarlyCSE::ID = 0; + +// createEarlyCSEPass - The public interface to this file. +FunctionPass *llvm::createEarlyCSEPass() { + return new EarlyCSE(); +} + +INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) + +bool EarlyCSE::processNode(DomTreeNode *Node) { + // Define a scope in the scoped hash table. When we are done processing this + // domtree node and recurse back up to our parent domtree node, this will pop + // off all the values we install. + ScopedHTType::ScopeTy Scope(*AvailableValues); + + // Define a scope for the load values so that anything we add will get + // popped when we recurse back up to our parent domtree node. + LoadHTType::ScopeTy LoadScope(*AvailableLoads); + + // Define a scope for the call values so that anything we add will get + // popped when we recurse back up to our parent domtree node. + CallHTType::ScopeTy CallScope(*AvailableCalls); + + BasicBlock *BB = Node->getBlock(); + + // If this block has a single predecessor, then the predecessor is the parent + // of the domtree node and all of the live out memory values are still current + // in this block. If this block has multiple predecessors, then they could + // have invalidated the live-out memory values of our parent value. For now, + // just be conservative and invalidate memory if this block has multiple + // predecessors. + if (BB->getSinglePredecessor() == 0) + ++CurrentGeneration; + + /// LastStore - Keep track of the last non-volatile store that we saw... for + /// as long as there in no instruction that reads memory. If we see a store + /// to the same location, we delete the dead store. This zaps trivial dead + /// stores which can occur in bitfield code among other things. + StoreInst *LastStore = 0; + + bool Changed = false; + + // See if any instructions in the block can be eliminated. If so, do it. If + // not, add them to AvailableValues. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + + // Dead instructions should just be removed. + if (isInstructionTriviallyDead(Inst)) { + DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); + Inst->eraseFromParent(); + Changed = true; + ++NumSimplify; + continue; + } + + // If the instruction can be simplified (e.g. X+0 = X) then replace it with + // its simpler value. + if (Value *V = SimplifyInstruction(Inst, TD, DT)) { + DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); + Inst->replaceAllUsesWith(V); + Inst->eraseFromParent(); + Changed = true; + ++NumSimplify; + continue; + } + + // If this is a simple instruction that we can value number, process it. + if (SimpleValue::canHandle(Inst)) { + // See if the instruction has an available value. If so, use it. + if (Value *V = AvailableValues->lookup(Inst)) { + DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n'); + Inst->replaceAllUsesWith(V); + Inst->eraseFromParent(); + Changed = true; + ++NumCSE; + continue; + } + + // Otherwise, just remember that this value is available. + AvailableValues->insert(Inst, Inst); + continue; + } + + // If this is a non-volatile load, process it. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // Ignore volatile loads. + if (LI->isVolatile()) { + LastStore = 0; + continue; + } + + // If we have an available version of this load, and if it is the right + // generation, replace this instruction. + std::pair<Value*, unsigned> InVal = + AvailableLoads->lookup(Inst->getOperand(0)); + if (InVal.first != 0 && InVal.second == CurrentGeneration) { + DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " + << *InVal.first << '\n'); + if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); + Inst->eraseFromParent(); + Changed = true; + ++NumCSELoad; + continue; + } + + // Otherwise, remember that we have this instruction. + AvailableLoads->insert(Inst->getOperand(0), + std::pair<Value*, unsigned>(Inst, CurrentGeneration)); + LastStore = 0; + continue; + } + + // If this instruction may read from memory, forget LastStore. + if (Inst->mayReadFromMemory()) + LastStore = 0; + + // If this is a read-only call, process it. + if (CallValue::canHandle(Inst)) { + // If we have an available version of this call, and if it is the right + // generation, replace this instruction. + std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst); + if (InVal.first != 0 && InVal.second == CurrentGeneration) { + DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " + << *InVal.first << '\n'); + if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); + Inst->eraseFromParent(); + Changed = true; + ++NumCSECall; + continue; + } + + // Otherwise, remember that we have this instruction. + AvailableCalls->insert(Inst, + std::pair<Value*, unsigned>(Inst, CurrentGeneration)); + continue; + } + + // Okay, this isn't something we can CSE at all. Check to see if it is + // something that could modify memory. If so, our available memory values + // cannot be used so bump the generation count. + if (Inst->mayWriteToMemory()) { + ++CurrentGeneration; + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // We do a trivial form of DSE if there are two stores to the same + // location with no intervening loads. Delete the earlier store. + if (LastStore && + LastStore->getPointerOperand() == SI->getPointerOperand()) { + DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " + << *Inst << '\n'); + LastStore->eraseFromParent(); + Changed = true; + ++NumDSE; + LastStore = 0; + continue; + } + + // Okay, we just invalidated anything we knew about loaded values. Try + // to salvage *something* by remembering that the stored value is a live + // version of the pointer. It is safe to forward from volatile stores + // to non-volatile loads, so we don't have to check for volatility of + // the store. + AvailableLoads->insert(SI->getPointerOperand(), + std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); + + // Remember that this was the last store we saw for DSE. + if (!SI->isVolatile()) + LastStore = SI; + } + } + } + + unsigned LiveOutGeneration = CurrentGeneration; + for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) { + Changed |= processNode(*I); + // Pop any generation changes off the stack from the recursive walk. + CurrentGeneration = LiveOutGeneration; + } + return Changed; +} + + +bool EarlyCSE::runOnFunction(Function &F) { + TD = getAnalysisIfAvailable<TargetData>(); + DT = &getAnalysis<DominatorTree>(); + + // Tables that the pass uses when walking the domtree. + ScopedHTType AVTable; + AvailableValues = &AVTable; + LoadHTType LoadTable; + AvailableLoads = &LoadTable; + CallHTType CallTable; + AvailableCalls = &CallTable; + + CurrentGeneration = 0; + return processNode(DT->getRootNode()); +} diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp index 53dd06d..4c3d188 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp @@ -27,13 +27,15 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const; public: static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(ID) {} + explicit GEPSplitter() : FunctionPass(ID) { + initializeGEPSplitterPass(*PassRegistry::getPassRegistry()); + } }; } char GEPSplitter::ID = 0; INITIALIZE_PASS(GEPSplitter, "split-geps", - "split complex GEPs into simple GEPs", false, false); + "split complex GEPs into simple GEPs", false, false) FunctionPass *llvm::createGEPSplitterPass() { return new GEPSplitter(); diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index c62ce1f..a0123f5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -17,39 +17,30 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" -#include "llvm/Operator.h" -#include "llvm/Value.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -61,7 +52,6 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); -static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -72,76 +62,23 @@ static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); /// two values. namespace { struct Expression { - enum ExpressionOpcode { - ADD = Instruction::Add, - FADD = Instruction::FAdd, - SUB = Instruction::Sub, - FSUB = Instruction::FSub, - MUL = Instruction::Mul, - FMUL = Instruction::FMul, - UDIV = Instruction::UDiv, - SDIV = Instruction::SDiv, - FDIV = Instruction::FDiv, - UREM = Instruction::URem, - SREM = Instruction::SRem, - FREM = Instruction::FRem, - SHL = Instruction::Shl, - LSHR = Instruction::LShr, - ASHR = Instruction::AShr, - AND = Instruction::And, - OR = Instruction::Or, - XOR = Instruction::Xor, - TRUNC = Instruction::Trunc, - ZEXT = Instruction::ZExt, - SEXT = Instruction::SExt, - FPTOUI = Instruction::FPToUI, - FPTOSI = Instruction::FPToSI, - UITOFP = Instruction::UIToFP, - SITOFP = Instruction::SIToFP, - FPTRUNC = Instruction::FPTrunc, - FPEXT = Instruction::FPExt, - PTRTOINT = Instruction::PtrToInt, - INTTOPTR = Instruction::IntToPtr, - BITCAST = Instruction::BitCast, - ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, - FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, - SHUFFLE, SELECT, GEP, CALL, CONSTANT, - INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; - - ExpressionOpcode opcode; + uint32_t opcode; const Type* type; SmallVector<uint32_t, 4> varargs; - Value *function; Expression() { } - Expression(ExpressionOpcode o) : opcode(o) { } + Expression(uint32_t o) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == EMPTY || opcode == TOMBSTONE) + else if (opcode == ~0U || opcode == ~1U) return true; else if (type != other.type) return false; - else if (function != other.function) + else if (varargs != other.varargs) return false; - else { - if (varargs.size() != other.varargs.size()) - return false; - - for (size_t i = 0; i < varargs.size(); ++i) - if (varargs[i] != other.varargs[i]) - return false; - - return true; - } - } - - bool operator!=(const Expression &other) const { - return !(*this == other); + return true; } }; @@ -155,19 +92,7 @@ namespace { uint32_t nextValueNumber; - Expression::ExpressionOpcode getOpcode(CmpInst* C); - Expression create_expression(BinaryOperator* BO); - Expression create_expression(CmpInst* C); - Expression create_expression(ShuffleVectorInst* V); - Expression create_expression(ExtractElementInst* C); - Expression create_expression(InsertElementInst* V); - Expression create_expression(SelectInst* V); - Expression create_expression(CastInst* C); - Expression create_expression(GetElementPtrInst* G); - Expression create_expression(CallInst* C); - Expression create_expression(ExtractValueInst* C); - Expression create_expression(InsertValueInst* C); - + Expression create_expression(Instruction* I); uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } @@ -176,7 +101,6 @@ namespace { void add(Value *V, uint32_t num); void clear(); void erase(Value *v); - unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } @@ -189,11 +113,11 @@ namespace { namespace llvm { template <> struct DenseMapInfo<Expression> { static inline Expression getEmptyKey() { - return Expression(Expression::EMPTY); + return ~0U; } static inline Expression getTombstoneKey() { - return Expression(Expression::TOMBSTONE); + return ~1U; } static unsigned getHashValue(const Expression e) { @@ -205,20 +129,13 @@ template <> struct DenseMapInfo<Expression> { for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - - hash = ((unsigned)((uintptr_t)e.function >> 4) ^ - (unsigned)((uintptr_t)e.function >> 9)) + - hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } }; - -template <> -struct isPodLike<Expression> { static const bool value = true; }; } @@ -226,185 +143,27 @@ struct isPodLike<Expression> { static const bool value = true; }; // ValueTable Internal Functions //===----------------------------------------------------------------------===// -Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa<ICmpInst>(C)) { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; - case ICmpInst::ICMP_NE: return Expression::ICMPNE; - case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; - case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; - case ICmpInst::ICMP_ULT: return Expression::ICMPULT; - case ICmpInst::ICMP_ULE: return Expression::ICMPULE; - case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; - case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; - case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; - case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; - } - } else { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; - } - } -} - -Expression ValueTable::create_expression(CallInst* C) { - Expression e; - - e.type = C->getType(); - e.function = C->getCalledFunction(); - e.opcode = Expression::CALL; - - CallSite CS(C); - for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); - - return e; -} - -Expression ValueTable::create_expression(BinaryOperator* BO) { - Expression e; - e.varargs.push_back(lookup_or_add(BO->getOperand(0))); - e.varargs.push_back(lookup_or_add(BO->getOperand(1))); - e.function = 0; - e.type = BO->getType(); - e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode()); - - return e; -} - -Expression ValueTable::create_expression(CmpInst* C) { - Expression e; - - e.varargs.push_back(lookup_or_add(C->getOperand(0))); - e.varargs.push_back(lookup_or_add(C->getOperand(1))); - e.function = 0; - e.type = C->getType(); - e.opcode = getOpcode(C); - - return e; -} - -Expression ValueTable::create_expression(CastInst* C) { - Expression e; - - e.varargs.push_back(lookup_or_add(C->getOperand(0))); - e.function = 0; - e.type = C->getType(); - e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode()); - - return e; -} - -Expression ValueTable::create_expression(ShuffleVectorInst* S) { - Expression e; - - e.varargs.push_back(lookup_or_add(S->getOperand(0))); - e.varargs.push_back(lookup_or_add(S->getOperand(1))); - e.varargs.push_back(lookup_or_add(S->getOperand(2))); - e.function = 0; - e.type = S->getType(); - e.opcode = Expression::SHUFFLE; - - return e; -} -Expression ValueTable::create_expression(ExtractElementInst* E) { +Expression ValueTable::create_expression(Instruction *I) { Expression e; - - e.varargs.push_back(lookup_or_add(E->getOperand(0))); - e.varargs.push_back(lookup_or_add(E->getOperand(1))); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::EXTRACT; - - return e; -} - -Expression ValueTable::create_expression(InsertElementInst* I) { - Expression e; - - e.varargs.push_back(lookup_or_add(I->getOperand(0))); - e.varargs.push_back(lookup_or_add(I->getOperand(1))); - e.varargs.push_back(lookup_or_add(I->getOperand(2))); - e.function = 0; e.type = I->getType(); - e.opcode = Expression::INSERT; - - return e; -} - -Expression ValueTable::create_expression(SelectInst* I) { - Expression e; - - e.varargs.push_back(lookup_or_add(I->getCondition())); - e.varargs.push_back(lookup_or_add(I->getTrueValue())); - e.varargs.push_back(lookup_or_add(I->getFalseValue())); - e.function = 0; - e.type = I->getType(); - e.opcode = Expression::SELECT; - - return e; -} - -Expression ValueTable::create_expression(GetElementPtrInst* G) { - Expression e; - - e.varargs.push_back(lookup_or_add(G->getPointerOperand())); - e.function = 0; - e.type = G->getType(); - e.opcode = Expression::GEP; - - for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); - I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); - - return e; -} - -Expression ValueTable::create_expression(ExtractValueInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::EXTRACTVALUE; - - return e; -} - -Expression ValueTable::create_expression(InsertValueInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); - e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); - for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::INSERTVALUE; - + e.opcode = I->getOpcode(); + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + + if (CmpInst *C = dyn_cast<CmpInst>(I)) + e.opcode = (C->getOpcode() << 8) | C->getPredicate(); + else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { + for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + } + return e; } @@ -563,12 +322,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::And: case Instruction::Or : case Instruction::Xor: - exp = create_expression(cast<BinaryOperator>(I)); - break; case Instruction::ICmp: case Instruction::FCmp: - exp = create_expression(cast<CmpInst>(I)); - break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -581,28 +336,14 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: - exp = create_expression(cast<CastInst>(I)); - break; case Instruction::Select: - exp = create_expression(cast<SelectInst>(I)); - break; case Instruction::ExtractElement: - exp = create_expression(cast<ExtractElementInst>(I)); - break; case Instruction::InsertElement: - exp = create_expression(cast<InsertElementInst>(I)); - break; case Instruction::ShuffleVector: - exp = create_expression(cast<ShuffleVectorInst>(I)); - break; case Instruction::ExtractValue: - exp = create_expression(cast<ExtractValueInst>(I)); - break; case Instruction::InsertValue: - exp = create_expression(cast<InsertValueInst>(I)); - break; case Instruction::GetElementPtr: - exp = create_expression(cast<GetElementPtrInst>(I)); + exp = create_expression(I); break; default: valueNumbering[V] = nextValueNumber; @@ -649,30 +390,76 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { - struct ValueNumberScope { - ValueNumberScope* parent; - DenseMap<uint32_t, Value*> table; - - ValueNumberScope(ValueNumberScope* p) : parent(p) { } - }; -} - -namespace { class GVN : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { } + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } private: bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetData* TD; ValueTable VN; - DenseMap<BasicBlock*, ValueNumberScope*> localAvail; + + /// LeaderTable - A mapping from value numbers to lists of Value*'s that + /// have that value number. Use findLeader to query it. + struct LeaderTableEntry { + Value *Val; + BasicBlock *BB; + LeaderTableEntry *Next; + }; + DenseMap<uint32_t, LeaderTableEntry> LeaderTable; + BumpPtrAllocator TableAllocator; + + /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for + /// its value number. + void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + LeaderTableEntry& Curr = LeaderTable[N]; + if (!Curr.Val) { + Curr.Val = V; + Curr.BB = BB; + return; + } + + LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>(); + Node->Val = V; + Node->BB = BB; + Node->Next = Curr.Next; + Curr.Next = Node; + } + + /// removeFromLeaderTable - Scan the list of values corresponding to a given + /// value number, and remove the given value if encountered. + void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + LeaderTableEntry* Prev = 0; + LeaderTableEntry* Curr = &LeaderTable[N]; + + while (Curr->Val != V || Curr->BB != BB) { + Prev = Curr; + Curr = Curr->Next; + } + + if (Prev) { + Prev->Next = Curr->Next; + } else { + if (!Curr->Next) { + Curr->Val = 0; + Curr->BB = 0; + } else { + LeaderTableEntry* Next = Curr->Next; + Curr->Val = Next->Val; + Curr->BB = Next->BB; + Curr->Next = Next->Next; + } + } + } // List of critical edges to be split between iterations. SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; @@ -699,9 +486,8 @@ namespace { bool processBlock(BasicBlock *BB); void dump(DenseMap<uint32_t, Value*>& d); bool iterateOnFunction(Function &F); - Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); - Value *lookupNumber(BasicBlock *BB, uint32_t num); + Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); @@ -715,7 +501,11 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { return new GVN(NoLoads); } -INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false); +INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; @@ -727,33 +517,6 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "}\n"; } -static bool isSafeReplacement(PHINode* p, Instruction *inst) { - if (!isa<PHINode>(inst)) - return true; - - for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); - UI != E; ++UI) - if (PHINode* use_phi = dyn_cast<PHINode>(*UI)) - if (use_phi->getParent() == inst->getParent()) - return false; - - return true; -} - -Value *GVN::CollapsePhi(PHINode *PN) { - Value *ConstVal = PN->hasConstantValue(DT); - if (!ConstVal) return 0; - - Instruction *Inst = dyn_cast<Instruction>(ConstVal); - if (!Inst) - return ConstVal; - - if (DT->dominates(Inst, PN)) - if (isSafeReplacement(PN, Inst)) - return Inst; - return 0; -} - /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -937,47 +700,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } -/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can -/// be expressed as a base pointer plus a constant offset. Return the base and -/// offset to the caller. -static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const TargetData &TD) { - Operator *PtrOp = dyn_cast<Operator>(Ptr); - if (PtrOp == 0) return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast<ConstantInt>(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; - } - } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); - - return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); -} - - /// AnalyzeLoadFromClobberingWrite - This function is called when we have a /// memdep query of a load that ends up being a clobbering memory write (store, /// memset, memcpy, memmove). This means that the write *may* provide bits used @@ -996,9 +718,8 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); - Value *LoadBase = - GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; @@ -1020,8 +741,6 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. - // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then - // remove this check, as it is duplicated with what we have below. uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); if ((WriteSizeInBits & 7) | (LoadSize & 7)) @@ -1067,12 +786,12 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. - if (DepSI->getOperand(0)->getType()->isStructTy() || - DepSI->getOperand(0)->getType()->isArrayTy()) + if (DepSI->getValueOperand()->getType()->isStructTy() || + DepSI->getValueOperand()->getType()->isArrayTy()) return -1; Value *StorePtr = DepSI->getPointerOperand(); - uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, TD); } @@ -1099,7 +818,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, Constant *Src = dyn_cast<Constant>(MTI->getSource()); if (Src == 0) return -1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); + GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD)); if (GV == 0 || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. @@ -1331,6 +1050,15 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (V->getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); + + // Now that we've copied information to the new PHIs, scan through + // them again and inform alias analysis that we've added potentially + // escaping uses to any values that are operands to these PHIs. + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { + PHINode *P = NewPHIs[i]; + for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) + AA->addEscapingUse(P->getOperandUse(2*ii)); + } return V; } @@ -1347,8 +1075,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl<Instruction*> &toErase) { // Find the non-local dependencies of the load. SmallVector<NonLocalDepResult, 64> Deps; - MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), - Deps); + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); @@ -1376,8 +1104,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; SmallVector<BasicBlock*, 16> UnavailableBlocks; - const TargetData *TD = 0; - for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1392,14 +1118,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, *TD); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - DepSI->getOperand(0), + DepSI->getValueOperand(), Offset)); continue; } @@ -1409,8 +1133,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, DepMI, *TD); @@ -1440,13 +1162,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. - if (S->getOperand(0)->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); - + if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), *TD)) { UnavailableBlocks.push_back(DepBB); continue; @@ -1454,16 +1173,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - S->getOperand(0))); + S->getValueOperand())); continue; } if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); - // If the stored value is larger or equal to the loaded value, we can // reuse it. if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ @@ -1533,26 +1249,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; if (Blockers.count(TmpBB)) return false; + + // If any of these blocks has more than one successor (i.e. if the edge we + // just traversed was critical), then there are other paths through this + // block along which the load may not be anticipated. Hoisting the load + // above this block would be adding the load to execution paths along + // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) - allSingleSucc = false; + return false; } assert(TmpBB); LoadBB = TmpBB; - // If we have a repl set with LI itself in it, this means we have a loop where - // at least one of the values is LI. Since this means that we won't be able - // to eliminate LI even if we insert uses in the other predecessors, we will - // end up increasing code size. Reject this by scanning for LI. - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - if (ValuesPerBlock[i].isSimpleValue() && - ValuesPerBlock[i].getSimpleValue() == LI) { - // Skip cases where LI is the only definition, even for EnableFullLoadPRE. - if (!EnableFullLoadPRE || e == 1) - return false; - } - } - // FIXME: It is extremely unclear what this loop is doing, other than // artificially restricting loadpre. if (isSinglePred) { @@ -1612,14 +1321,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - if (!EnableFullLoadPRE) { - // If this load is unavailable in multiple predecessors, reject it. - // FIXME: If we could restructure the CFG, we could make a common pred with - // all the preds that don't have an available LI and insert a new load into - // that one block. - if (NumUnavailablePreds != 1) + + // If this load is unavailable in multiple predecessors, reject it. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + if (NumUnavailablePreds != 1) return false; - } // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; @@ -1634,7 +1342,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getOperand(0), TD); + PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; if (allSingleSucc) { LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, @@ -1648,7 +1356,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // we fail PRE. if (LoadPtr == 0) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " - << *LI->getOperand(0) << "\n"); + << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } @@ -1657,8 +1365,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // @1 = getelementptr (i8* p, ... // test p and branch if == 0 // load @1 - // It is valid to have the getelementptr before the test, even if p can be 0, - // as getelementptr only does address arithmetic. + // It is valid to have the getelementptr before the test, even if p can + // be 0, as getelementptr only does address arithmetic. // If we are not pushing the value through any multiple-successor blocks // we do not have this case. Otherwise, check that the load is safe to // put anywhere; this can be improved, but should be conservatively safe. @@ -1675,8 +1383,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } if (!CanDoPRE) { - while (!NewInsts.empty()) - NewInsts.pop_back_val()->eraseFromParent(); + while (!NewInsts.empty()) { + Instruction *I = NewInsts.pop_back_val(); + if (MD) MD->removeInstruction(I); + I->eraseFromParent(); + } return false; } @@ -1702,9 +1413,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, BasicBlock *UnavailablePred = I->first; Value *LoadPtr = I->second; - Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, - LI->getAlignment(), - UnavailablePred->getTerminator()); + Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + // Transfer the old load's TBAA tag to the new load. + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) + NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, @@ -1753,19 +1468,19 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // access code. Value *AvailVal = 0; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) - if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { + if (TD) { int Offset = AnalyzeLoadFromClobberingStore(L->getType(), L->getPointerOperand(), DepSI, *TD); if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, L->getType(), L, *TD); } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { + if (TD) { int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), L->getPointerOperand(), DepMI, *TD); @@ -1804,14 +1519,13 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { - Value *StoredVal = DepSI->getOperand(0); + Value *StoredVal = DepSI->getValueOperand(); // The store and load are to a must-aliased pointer, but they may not // actually have the same type. See if we know how to reuse the stored // value (depending on its type). - const TargetData *TD = 0; if (StoredVal->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable<TargetData>())) { + if (TD) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *TD); if (StoredVal == 0) @@ -1840,9 +1554,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // The loads are of a must-aliased pointer, but they may not actually have // the same type. See if we know how to reuse the previously loaded value // (depending on its type). - const TargetData *TD = 0; if (DepLI->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable<TargetData>())) { + if (TD) { AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); if (AvailableVal == 0) return false; @@ -1890,20 +1603,32 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { return false; } -Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { - DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); - if (I == localAvail.end()) - return 0; - - ValueNumberScope *Locals = I->second; - while (Locals) { - DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); - if (I != Locals->table.end()) - return I->second; - Locals = Locals->parent; +// findLeader - In order to find a leader for a given value number at a +// specific basic block, we first obtain the list of all Values for that number, +// and then scan the list to find one whose block dominates the block in +// question. This is fast because dominator tree queries consist of only +// a few comparisons of DFS numbers. +Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { + LeaderTableEntry Vals = LeaderTable[num]; + if (!Vals.Val) return 0; + + Value *Val = 0; + if (DT->dominates(Vals.BB, BB)) { + Val = Vals.Val; + if (isa<Constant>(Val)) return Val; + } + + LeaderTableEntry* Next = Vals.Next; + while (Next) { + if (DT->dominates(Next->BB, BB)) { + if (isa<Constant>(Next->Val)) return Next->Val; + if (!Val) Val = Next->Val; + } + + Next = Next->Next; } - return 0; + return Val; } @@ -1915,85 +1640,92 @@ bool GVN::processInstruction(Instruction *I, if (isa<DbgInfoIntrinsic>(I)) return false; + // If the instruction can be easily simplified then do so now in preference + // to value numbering it. Value numbering often exposes redundancies, for + // example if it determines that %y is equal to %x then the instruction + // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. + if (Value *V = SimplifyInstruction(I, TD, DT)) { + I->replaceAllUsesWith(V); + if (MD && V->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + VN.erase(I); + toErase.push_back(I); + return true; + } + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { bool Changed = processLoad(LI, toErase); if (!Changed) { unsigned Num = VN.lookup_or_add(LI); - localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); + addToLeaderTable(Num, LI, LI->getParent()); } return Changed; } - uint32_t NextNum = VN.getNextUnusedValueNumber(); - unsigned Num = VN.lookup_or_add(I); - + // For conditions branches, we can perform simple conditional propagation on + // the condition value itself. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - if (!BI->isConditional() || isa<Constant>(BI->getCondition())) return false; - + Value *BranchCond = BI->getCondition(); uint32_t CondVN = VN.lookup_or_add(BranchCond); - + BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); - + if (TrueSucc->getSinglePredecessor()) - localAvail[TrueSucc]->table[CondVN] = - ConstantInt::getTrue(TrueSucc->getContext()); + addToLeaderTable(CondVN, + ConstantInt::getTrue(TrueSucc->getContext()), + TrueSucc); if (FalseSucc->getSinglePredecessor()) - localAvail[FalseSucc]->table[CondVN] = - ConstantInt::getFalse(TrueSucc->getContext()); - + addToLeaderTable(CondVN, + ConstantInt::getFalse(TrueSucc->getContext()), + FalseSucc); + return false; + } + + // Instructions with void type don't return a value, so there's + // no point in trying to find redudancies in them. + if (I->getType()->isVoidTy()) return false; + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); // Allocations are always uniquely numbered, so we can save time and memory // by fast failing them. - } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { + addToLeaderTable(Num, I, I->getParent()); return false; } - // Collapse PHI nodes - if (PHINode* p = dyn_cast<PHINode>(I)) { - Value *constVal = CollapsePhi(p); - - if (constVal) { - p->replaceAllUsesWith(constVal); - if (MD && constVal->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(constVal); - VN.erase(p); - - toErase.push_back(p); - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - } - // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (Num == NextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - + if (Num == NextNum) { + addToLeaderTable(Num, I, I->getParent()); + return false; + } + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value *repl = lookupNumber(I->getParent(), Num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (MD && repl->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; - - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + Value *repl = findLeader(I->getParent(), Num); + if (repl == 0) { + // Failure, just remember this instance for future use. + addToLeaderTable(Num, I, I->getParent()); + return false; } - - return false; + + // Remove it! + VN.erase(I); + I->replaceAllUsesWith(repl); + if (MD && repl->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(repl); + toErase.push_back(I); + return true; } /// runOnFunction - This is the main transformation entry point for a function. @@ -2001,6 +1733,7 @@ bool GVN::runOnFunction(Function& F) { if (!NoLoads) MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); @@ -2011,8 +1744,8 @@ bool GVN::runOnFunction(Function& F) { // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock *BB = FI; - ++FI; + BasicBlock *BB = FI++; + bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) ++NumGVNBlocks; @@ -2020,7 +1753,6 @@ bool GVN::runOnFunction(Function& F) { } unsigned Iteration = 0; - while (ShouldContinue) { DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); @@ -2138,20 +1870,19 @@ bool GVN::performPRE(Function &F) { if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(P)) { + } else if (!DT->dominates(&F.getEntryBlock(), P)) { NumWithout = 2; break; } - DenseMap<uint32_t, Value*>::iterator predV = - localAvail[P]->table.find(ValNo); - if (predV == localAvail[P]->table.end()) { + Value* predV = findLeader(P, ValNo); + if (predV == 0) { PREPred = P; ++NumWithout; - } else if (predV->second == CurInst) { + } else if (predV == CurInst) { NumWithout = 2; } else { - predMap[P] = predV->second; + predMap[P] = predV; ++NumWith; } } @@ -2186,7 +1917,7 @@ bool GVN::performPRE(Function &F) { if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) continue; - if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { + if (Value *V = findLeader(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { success = false; @@ -2210,7 +1941,7 @@ bool GVN::performPRE(Function &F) { ++NumGVNPRE; // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. PHINode* Phi = PHINode::Create(CurInst->getType(), @@ -2223,12 +1954,21 @@ bool GVN::performPRE(Function &F) { } VN.add(Phi, ValNo); - localAvail[CurrentBlock]->table[ValNo] = Phi; + addToLeaderTable(ValNo, Phi, CurrentBlock); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(Phi); + if (Phi->getType()->isPointerTy()) { + // Because we have added a PHI-use of the pointer value, it has now + // "escaped" from alias analysis' perspective. We need to inform + // AA of this. + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); + + if (MD) + MD->invalidateCachedPointerInfo(Phi); + } VN.erase(CurInst); + removeFromLeaderTable(ValNo, CurInst, CurrentBlock); DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); @@ -2260,16 +2000,7 @@ bool GVN::splitCriticalEdges() { /// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); - - for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - if (DI->getIDom()) - localAvail[DI->getBlock()] = - new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); - else - localAvail[DI->getBlock()] = new ValueNumberScope(0); - } - + // Top-down walk of the dominator tree bool Changed = false; #if 0 @@ -2289,11 +2020,8 @@ bool GVN::iterateOnFunction(Function &F) { void GVN::cleanupGlobalSets() { VN.clear(); - - for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) - delete I->second; - localAvail.clear(); + LeaderTable.clear(); + TableAllocator.Reset(); } /// verifyRemoved - Verify that the specified instruction does not occur in our @@ -2303,17 +2031,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { - const ValueNumberScope *VNS = I->second; - - while (VNS) { - for (DenseMap<uint32_t, Value*>::const_iterator - II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { - assert(II->second != Inst && "Inst still in value numbering scope!"); - } - - VNS = VNS->parent; + for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator + I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { + const LeaderTableEntry *Node = &I->second; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); + + while (Node->Next) { + Node = Node->Next; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); } } } diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index af2eafc..0fb6798 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -77,7 +77,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID) {} + IndVarSimplify() : LoopPass(ID) { + initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -117,8 +119,16 @@ namespace { } char IndVarSimplify::ID = 0; -INITIALIZE_PASS(IndVarSimplify, "indvars", - "Canonicalize Induction Variables", false, false); +INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", + "Canonicalize Induction Variables", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(IVUsers) +INITIALIZE_PASS_END(IndVarSimplify, "indvars", + "Canonicalize Induction Variables", false, false) Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); @@ -190,7 +200,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, } // Expand the code for the iteration count. - assert(RHS->isLoopInvariant(L) && + assert(SE->isLoopInvariant(RHS, L) && "Computed iteration count is not loop invariant!"); Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); @@ -233,8 +243,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, /// happen later, except that it's more powerful in some cases, because it's /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. -void IndVarSimplify::RewriteLoopExitValues(Loop *L, - SCEVExpander &Rewriter) { +void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Verify the input to the pass in already in LCSSA form. assert(L->isLCSSAForm(*DT)); @@ -292,7 +301,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (!ExitValue->isLoopInvariant(L)) + if (!SE->isLoopInvariant(ExitValue, L)) continue; Changed = true; @@ -338,7 +347,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { // If there are, change them into integer recurrences, permitting analysis by // the SCEV routines. // - BasicBlock *Header = L->getHeader(); + BasicBlock *Header = L->getHeader(); SmallVector<WeakVH, 8> PHIs; for (BasicBlock::iterator I = Header->begin(); @@ -346,7 +355,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { PHIs.push_back(PN); for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i])) + if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) HandleFloatingPointIV(L, PN); // If the loop previously had floating-point IV, ScalarEvolution @@ -395,7 +404,7 @@ void IndVarSimplify::EliminateIVComparisons() { // which are now dead. while (!DeadInsts.empty()) if (Instruction *Inst = - dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } @@ -462,7 +471,7 @@ void IndVarSimplify::EliminateIVRemainders() { // which are now dead. while (!DeadInsts.empty()) if (Instruction *Inst = - dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } @@ -607,9 +616,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // currently can only reduce affine polynomials. For now just disable // indvar subst on anything more complex than an affine addrec, unless // it can be expanded to a trivial value. -static bool isSafe(const SCEV *S, const Loop *L) { +static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { // Loop-invariant values are safe. - if (S->isLoopInvariant(L)) return true; + if (SE->isLoopInvariant(S, L)) return true; // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how // to transform them into efficient code. @@ -620,18 +629,18 @@ static bool isSafe(const SCEV *S, const Loop *L) { if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), E = Commutative->op_end(); I != E; ++I) - if (!isSafe(*I, L)) return false; + if (!isSafe(*I, L, SE)) return false; return true; } // A cast is safe if its operand is. if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) - return isSafe(C->getOperand(), L); + return isSafe(C->getOperand(), L, SE); // A udiv is safe if its operands are. if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) - return isSafe(UD->getLHS(), L) && - isSafe(UD->getRHS(), L); + return isSafe(UD->getLHS(), L, SE) && + isSafe(UD->getRHS(), L, SE); // SCEVUnknown is always safe. if (isa<SCEVUnknown>(S)) @@ -662,7 +671,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // Evaluate the expression out of the loop, if possible. if (!L->contains(UI->getUser())) { const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (ExitVal->isLoopInvariant(L)) + if (SE->isLoopInvariant(ExitVal, L)) AR = ExitVal; } @@ -672,7 +681,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // currently can only reduce affine polynomials. For now just disable // indvar subst on anything more complex than an affine addrec, unless // it can be expanded to a trivial value. - if (!isSafe(AR, L)) + if (!isSafe(AR, L, SE)) continue; // Determine the insertion point for this user. By default, insert @@ -725,7 +734,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // which are now dead. while (!DeadInsts.empty()) if (Instruction *Inst = - dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 104d5ae..90094a8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -40,20 +40,22 @@ STATISTIC(NumFolds, "Number of terminators folded"); STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); static cl::opt<unsigned> -Threshold("jump-threading-threshold", +Threshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden); -// Turn on use of LazyValueInfo. -static cl::opt<bool> -EnableLVI("enable-jump-threading-lvi", - cl::desc("Use LVI for jump threading"), - cl::init(true), - cl::ReallyHidden); - - - namespace { + // These are at global scope so static functions can use them too. + typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo; + typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy; + + // This is used to keep track of what kind of constant we're currently hoping + // to find. + enum ConstantPreference { + WantInteger, + WantBlockAddress + }; + /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the /// predecessors of the block can be proven to always jump to one of the @@ -79,61 +81,59 @@ namespace { SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; #endif DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; - + // RAII helper for updating the recursion stack. struct RecursionSetRemover { DenseSet<std::pair<Value*, BasicBlock*> > &TheSet; std::pair<Value*, BasicBlock*> ThePair; - + RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S, std::pair<Value*, BasicBlock*> P) : TheSet(S), ThePair(P) { } - + ~RecursionSetRemover() { TheSet.erase(ThePair); } }; public: static char ID; // Pass identification - JumpThreading() : FunctionPass(ID) {} + JumpThreading() : FunctionPass(ID) { + initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { - if (EnableLVI) { - AU.addRequired<LazyValueInfo>(); - AU.addPreserved<LazyValueInfo>(); - } + AU.addRequired<LazyValueInfo>(); + AU.addPreserved<LazyValueInfo>(); } - + void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, BasicBlock *SuccBB); bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs); - - typedef SmallVectorImpl<std::pair<ConstantInt*, - BasicBlock*> > PredValueInfo; - + bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, - PredValueInfo &Result); - bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB); - - - bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); - bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); + PredValueInfo &Result, + ConstantPreference Preference); + bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, + ConstantPreference Preference); bool ProcessBranchOnPHI(PHINode *PN); bool ProcessBranchOnXOR(BinaryOperator *BO); - + bool SimplifyPartiallyRedundantLoad(LoadInst *LI); }; } char JumpThreading::ID = 0; -INITIALIZE_PASS(JumpThreading, "jump-threading", - "Jump Threading", false, false); +INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", + "Jump Threading", false, false) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_END(JumpThreading, "jump-threading", + "Jump Threading", false, false) // Public interface to the Jump Threading pass FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } @@ -143,21 +143,21 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); - LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0; - + LVI = &getAnalysis<LazyValueInfo>(); + FindLoopHeaders(F); - + bool Changed, EverChanged = false; do { Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { BasicBlock *BB = I; - // Thread all of the branches we can over this block. + // Thread all of the branches we can over this block. while (ProcessBlock(BB)) Changed = true; - + ++I; - + // If the block is trivially dead, zap it. This eliminates the successor // edges which simplifies the CFG. if (pred_begin(BB) == pred_end(BB) && @@ -165,48 +165,46 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); - if (LVI) LVI->eraseBlock(BB); + LVI->eraseBlock(BB); DeleteDeadBlock(BB); Changed = true; - } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - // Can't thread an unconditional jump, but if the block is "almost - // empty", we can replace uses of it with uses of the successor and make - // this dead. - if (BI->isUnconditional() && - BB != &BB->getParent()->getEntryBlock()) { - BasicBlock::iterator BBI = BB->getFirstNonPHI(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; + continue; + } + + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + + // Can't thread an unconditional jump, but if the block is "almost + // empty", we can replace uses of it with uses of the successor and make + // this dead. + if (BI && BI->isUnconditional() && + BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - if (BBI->isTerminator()) { - // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the - // block, we have to make sure it isn't in the LoopHeaders set. We - // reinsert afterward if needed. - bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); - BasicBlock *Succ = BI->getSuccessor(0); - - // FIXME: It is always conservatively correct to drop the info - // for a block even if it doesn't get erased. This isn't totally - // awesome, but it allows us to use AssertingVH to prevent nasty - // dangling pointer issues within LazyValueInfo. - if (LVI) LVI->eraseBlock(BB); - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { - Changed = true; - // If we deleted BB and BB was the header of a loop, then the - // successor is now the header of the loop. - BB = Succ; - } - - if (ErasedFromLoopHeaders) - LoopHeaders.insert(BB); - } + BB->getFirstNonPHIOrDbg()->isTerminator()) { + // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the + // block, we have to make sure it isn't in the LoopHeaders set. We + // reinsert afterward if needed. + bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); + BasicBlock *Succ = BI->getSuccessor(0); + + // FIXME: It is always conservatively correct to drop the info + // for a block even if it doesn't get erased. This isn't totally + // awesome, but it allows us to use AssertingVH to prevent nasty + // dangling pointer issues within LazyValueInfo. + LVI->eraseBlock(BB); + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { + Changed = true; + // If we deleted BB and BB was the header of a loop, then the + // successor is now the header of the loop. + BB = Succ; } + + if (ErasedFromLoopHeaders) + LoopHeaders.insert(BB); } } EverChanged |= Changed; } while (Changed); - + LoopHeaders.clear(); return EverChanged; } @@ -216,25 +214,25 @@ bool JumpThreading::runOnFunction(Function &F) { static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { /// Ignore PHI nodes, these will be flattened when duplication happens. BasicBlock::const_iterator I = BB->getFirstNonPHI(); - + // FIXME: THREADING will delete values that are just used to compute the // branch, so they shouldn't count against the duplication cost. - - + + // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. unsigned Size = 0; for (; !isa<TerminatorInst>(I); ++I) { // Debugger intrinsics don't incur code size. if (isa<DbgInfoIntrinsic>(I)) continue; - + // If this is a pointer->pointer bitcast, it is free. if (isa<BitCastInst>(I) && I->getType()->isPointerTy()) continue; - + // All other instructions count for at least one unit. ++Size; - + // Calls are more expensive. If they are non-intrinsic calls, we model them // as having cost of 4. If they are a non-vector intrinsic, we model them // as having cost of 2 total, and if they are a vector intrinsic, we model @@ -246,12 +244,16 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { Size += 1; } } - + // Threading through a switch statement is particularly profitable. If this // block ends in a switch, decrease its cost to make it more likely to happen. if (isa<SwitchInst>(I)) Size = Size > 6 ? Size-6 : 0; - + + // The same holds for indirect branches, but slightly more so. + if (isa<IndirectBrInst>(I)) + Size = Size > 8 ? Size-8 : 0; + return Size; } @@ -273,57 +275,64 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { void JumpThreading::FindLoopHeaders(Function &F) { SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; FindFunctionBackedges(F, Edges); - + for (unsigned i = 0, e = Edges.size(); i != e; ++i) LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); } -// Helper method for ComputeValueKnownInPredecessors. If Value is a -// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing. -static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*, - BasicBlock*> > &Result, - Constant *Value, BasicBlock* BB){ - if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value)) - Result.push_back(std::make_pair(FoldedCInt, BB)); - else if (isa<UndefValue>(Value)) - Result.push_back(std::make_pair((ConstantInt*)0, BB)); +/// getKnownConstant - Helper method to determine if we can thread over a +/// terminator with the given value as its condition, and if so what value to +/// use for that. What kind of value this is depends on whether we want an +/// integer or a block address, but an undef is always accepted. +/// Returns null if Val is null or not an appropriate constant. +static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { + if (!Val) + return 0; + + // Undef is "known" enough. + if (UndefValue *U = dyn_cast<UndefValue>(Val)) + return U; + + if (Preference == WantBlockAddress) + return dyn_cast<BlockAddress>(Val->stripPointerCasts()); + + return dyn_cast<ConstantInt>(Val); } /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see -/// if we can infer that the value is a known ConstantInt in any of our -/// predecessors. If so, return the known list of value and pred BB in the -/// result vector. If a value is known to be undef, it is returned as null. +/// if we can infer that the value is a known ConstantInt/BlockAddress or undef +/// in any of our predecessors. If so, return the known list of value and pred +/// BB in the result vector. /// /// This returns true if there were any known values. /// bool JumpThreading:: -ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ +ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, + ConstantPreference Preference) { // This method walks up use-def chains recursively. Because of this, we could // get into an infinite loop going around loops in the use-def chain. To // prevent this, keep track of what (value, block) pairs we've already visited // and terminate the search if we loop back to them if (!RecursionSet.insert(std::make_pair(V, BB)).second) return false; - + // An RAII help to remove this pair from the recursion set once the recursion // stack pops back out again. RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); - - // If V is a constantint, then it is known in all predecessors. - if (isa<ConstantInt>(V) || isa<UndefValue>(V)) { - ConstantInt *CI = dyn_cast<ConstantInt>(V); - + + // If V is a constant, then it is known in all predecessors. + if (Constant *KC = getKnownConstant(V, Preference)) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Result.push_back(std::make_pair(CI, *PI)); - + Result.push_back(std::make_pair(KC, *PI)); + return true; } - + // If V is a non-instruction value, or an instruction in a different block, // then it can't be derived from a PHI. Instruction *I = dyn_cast<Instruction>(V); if (I == 0 || I->getParent() != BB) { - + // Okay, if this is a live-in value, see if it has a known value at the end // of any of our predecessors. // @@ -331,82 +340,78 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ /// TODO: Per PR2563, we could infer value range information about a /// predecessor based on its terminator. // - if (LVI) { - // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if - // "I" is a non-local compare-with-a-constant instruction. This would be - // able to handle value inequalities better, for example if the compare is - // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. - // Perhaps getConstantOnEdge should be smart enough to do this? - - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - // If the value is known by LazyValueInfo to be a constant in a - // predecessor, use that information to try to thread this block. - Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); - if (PredCst == 0 || - (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst))) - continue; - - Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P)); - } - - return !Result.empty(); + // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if + // "I" is a non-local compare-with-a-constant instruction. This would be + // able to handle value inequalities better, for example if the compare is + // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. + // Perhaps getConstantOnEdge should be smart enough to do this? + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); + if (Constant *KC = getKnownConstant(PredCst, Preference)) + Result.push_back(std::make_pair(KC, P)); } - - return false; + + return !Result.empty(); } - + /// If I is a PHI node, then we know the incoming values for any constants. if (PHINode *PN = dyn_cast<PHINode>(I)) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); - if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) { - ConstantInt *CI = dyn_cast<ConstantInt>(InVal); - Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); - } else if (LVI) { + if (Constant *KC = getKnownConstant(InVal, Preference)) { + Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); + } else { Constant *CI = LVI->getConstantOnEdge(InVal, PN->getIncomingBlock(i), BB); - // LVI returns null is no value could be determined. - if (!CI) continue; - PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i)); + if (Constant *KC = getKnownConstant(CI, Preference)) + Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); } } - + return !Result.empty(); } - - SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals; + + PredValueInfoTy LHSVals, RHSVals; // Handle some boolean conditions. - if (I->getType()->getPrimitiveSizeInBits() == 1) { + if (I->getType()->getPrimitiveSizeInBits() == 1) { + assert(Preference == WantInteger && "One-bit non-integer type?"); // X | true -> true // X & false -> false if (I->getOpcode() == Instruction::Or || I->getOpcode() == Instruction::And) { - ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); - ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals); - + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, + WantInteger); + ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, + WantInteger); + if (LHSVals.empty() && RHSVals.empty()) return false; - + ConstantInt *InterestingVal; if (I->getOpcode() == Instruction::Or) InterestingVal = ConstantInt::getTrue(I->getContext()); else InterestingVal = ConstantInt::getFalse(I->getContext()); - + SmallPtrSet<BasicBlock*, 4> LHSKnownBBs; - + // Scan for the sentinel. If we find an undef, force it to the // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) - if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { + if (LHSVals[i].first == InterestingVal || + isa<UndefValue>(LHSVals[i].first)) { Result.push_back(LHSVals[i]); Result.back().first = InterestingVal; LHSKnownBBs.insert(LHSVals[i].second); } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) - if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { + if (RHSVals[i].first == InterestingVal || + isa<UndefValue>(RHSVals[i].first)) { // If we already inferred a value for this block on the LHS, don't // re-add it. if (!LHSKnownBBs.count(RHSVals[i].second)) { @@ -414,48 +419,51 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ Result.back().first = InterestingVal; } } - + return !Result.empty(); } - + // Handle the NOT form of XOR. if (I->getOpcode() == Instruction::Xor && isa<ConstantInt>(I->getOperand(1)) && cast<ConstantInt>(I->getOperand(1))->isOne()) { - ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result); + ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, + WantInteger); if (Result.empty()) return false; // Invert the known values. for (unsigned i = 0, e = Result.size(); i != e; ++i) - if (Result[i].first) - Result[i].first = - cast<ConstantInt>(ConstantExpr::getNot(Result[i].first)); - + Result[i].first = ConstantExpr::getNot(Result[i].first); + return true; } - + // Try to simplify some other binary operator values. } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + assert(Preference != WantBlockAddress + && "A binary operator creating a block address?"); if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { - SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; - ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals); - + PredValueInfoTy LHSVals; + ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals, + WantInteger); + // Try to use constant folding to simplify the binary operator. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { - Constant *V = LHSVals[i].first ? LHSVals[i].first : - cast<Constant>(UndefValue::get(BO->getType())); + Constant *V = LHSVals[i].first; Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); - - PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + + if (Constant *KC = getKnownConstant(Folded, WantInteger)) + Result.push_back(std::make_pair(KC, LHSVals[i].second)); } } - + return !Result.empty(); } - + // Handle compare with phi operand, where the PHI is defined in this block. if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + assert(Preference == WantInteger && "Compares only produce integers"); PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); if (PN && PN->getParent() == BB) { // We can do this simplification if any comparisons fold to true or false. @@ -464,32 +472,31 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ BasicBlock *PredBB = PN->getIncomingBlock(i); Value *LHS = PN->getIncomingValue(i); Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); - + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD); if (Res == 0) { - if (!LVI || !isa<Constant>(RHS)) + if (!isa<Constant>(RHS)) continue; - - LazyValueInfo::Tristate + + LazyValueInfo::Tristate ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, cast<Constant>(RHS), PredBB, BB); if (ResT == LazyValueInfo::Unknown) continue; Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); } - - if (Constant *ConstRes = dyn_cast<Constant>(Res)) - PushConstantIntOrUndef(Result, ConstRes, PredBB); + + if (Constant *KC = getKnownConstant(Res, WantInteger)) + Result.push_back(std::make_pair(KC, PredBB)); } - + return !Result.empty(); } - - + + // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. - if (LVI && isa<Constant>(Cmp->getOperand(1)) && - Cmp->getType()->isIntegerTy()) { + if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) { if (!isa<Instruction>(Cmp->getOperand(0)) || cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); @@ -505,44 +512,74 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ continue; Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + Result.push_back(std::make_pair(ResC, P)); } return !Result.empty(); } - + // Try to find a constant value for the LHS of a comparison, // and evaluate it statically if we can. if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) { - SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; - ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); - + PredValueInfoTy LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, + WantInteger); + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { - Constant *V = LHSVals[i].first ? LHSVals[i].first : - cast<Constant>(UndefValue::get(CmpConst->getType())); + Constant *V = LHSVals[i].first; Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), V, CmpConst); - PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + if (Constant *KC = getKnownConstant(Folded, WantInteger)) + Result.push_back(std::make_pair(KC, LHSVals[i].second)); } - + return !Result.empty(); } } } - - if (LVI) { - // If all else fails, see if LVI can figure out a constant value for us. - Constant *CI = LVI->getConstant(V, BB); - ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI); - if (CInt) { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Result.push_back(std::make_pair(CInt, *PI)); + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + // Handle select instructions where at least one operand is a known constant + // and we can figure out the condition value for any predecessor block. + Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference); + Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference); + PredValueInfoTy Conds; + if ((TrueVal || FalseVal) && + ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, + WantInteger)) { + for (unsigned i = 0, e = Conds.size(); i != e; ++i) { + Constant *Cond = Conds[i].first; + + // Figure out what value to use for the condition. + bool KnownCond; + if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) { + // A known boolean. + KnownCond = CI->isOne(); + } else { + assert(isa<UndefValue>(Cond) && "Unexpected condition value"); + // Either operand will do, so be sure to pick the one that's a known + // constant. + // FIXME: Do this more cleverly if both values are known constants? + KnownCond = (TrueVal != 0); + } + + // See if the select has a known constant value for this predecessor. + if (Constant *Val = KnownCond ? TrueVal : FalseVal) + Result.push_back(std::make_pair(Val, Conds[i].second)); + } + + return !Result.empty(); } - - return !Result.empty(); } - - return false; + + // If all else fails, see if LVI can figure out a constant value for us. + Constant *CI = LVI->getConstant(V, BB); + if (Constant *KC = getKnownConstant(CI, Preference)) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(KC, *PI)); + } + + return !Result.empty(); } @@ -565,10 +602,20 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { if (NumPreds < MinNumPreds) MinSucc = i; } - + return MinSucc; } +static bool hasAddressTakenAndUsed(BasicBlock *BB) { + if (!BB->hasAddressTaken()) return false; + + // If the block has its address taken, it may be a tree of dead constants + // hanging off of it. These shouldn't keep the block alive. + BlockAddress *BA = BlockAddress::get(BB); + BA->removeDeadConstantUsers(); + return !BA->use_empty(); +} + /// ProcessBlock - If there are any predecessors whose control can be threaded /// through to a successor, transform them now. bool JumpThreading::ProcessBlock(BasicBlock *BB) { @@ -577,167 +624,122 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) return false; - + // If this block has a single predecessor, and if that pred has a single // successor, merge the blocks. This encourages recursive jump threading // because now the condition in this block can be threaded through // predecessors of our predecessor block. if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { if (SinglePred->getTerminator()->getNumSuccessors() == 1 && - SinglePred != BB) { + SinglePred != BB && !hasAddressTakenAndUsed(BB)) { // If SinglePred was a loop header, BB becomes one. if (LoopHeaders.erase(SinglePred)) LoopHeaders.insert(BB); - + // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - if (LVI) LVI->eraseBlock(SinglePred); + LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); - + if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); return true; } } - // Look to see if the terminator is a branch of switch, if not we can't thread - // it. + // What kind of constant we're looking for. + ConstantPreference Preference = WantInteger; + + // Look to see if the terminator is a conditional branch, switch or indirect + // branch, if not we can't thread it. Value *Condition; - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + Instruction *Terminator = BB->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) { // Can't thread an unconditional jump. if (BI->isUnconditional()) return false; Condition = BI->getCondition(); - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) { Condition = SI->getCondition(); - else + } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) { + Condition = IB->getAddress()->stripPointerCasts(); + Preference = WantBlockAddress; + } else { return false; // Must be an invoke. - - // If the terminator of this block is branching on a constant, simplify the - // terminator to an unconditional branch. This can occur due to threading in - // other blocks. - if (isa<ConstantInt>(Condition)) { - DEBUG(dbgs() << " In block '" << BB->getName() - << "' folding terminator: " << *BB->getTerminator() << '\n'); - ++NumFolds; - ConstantFoldTerminator(BB); - return true; } - + // If the terminator is branching on an undef, we can pick any of the // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { unsigned BestSucc = GetBestDestForJumpOnUndef(BB); - + // Fold the branch/switch. TerminatorInst *BBTerm = BB->getTerminator(); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD); + BBTerm->getSuccessor(i)->removePredecessor(BB, true); } - + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); return true; } - - Instruction *CondInst = dyn_cast<Instruction>(Condition); - // If the condition is an instruction defined in another block, see if a - // predecessor has the same condition: - // br COND, BBX, BBY - // BBX: - // br COND, BBZ, BBW - if (!LVI && - !Condition->hasOneUse() && // Multiple uses. - (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. - pred_iterator PI = pred_begin(BB), E = pred_end(BB); - if (isa<BranchInst>(BB->getTerminator())) { - for (; PI != E; ++PI) { - BasicBlock *P = *PI; - if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) - if (PBI->isConditional() && PBI->getCondition() == Condition && - ProcessBranchOnDuplicateCond(P, BB)) - return true; - } - } else { - assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator"); - for (; PI != E; ++PI) { - BasicBlock *P = *PI; - if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator())) - if (PSI->getCondition() == Condition && - ProcessSwitchOnDuplicateCond(P, BB)) - return true; - } - } + // If the terminator of this block is branching on a constant, simplify the + // terminator to an unconditional branch. This can occur due to threading in + // other blocks. + if (getKnownConstant(Condition, Preference)) { + DEBUG(dbgs() << " In block '" << BB->getName() + << "' folding terminator: " << *BB->getTerminator() << '\n'); + ++NumFolds; + ConstantFoldTerminator(BB); + return true; } + Instruction *CondInst = dyn_cast<Instruction>(Condition); + // All the rest of our checks depend on the condition being an instruction. if (CondInst == 0) { // FIXME: Unify this with code below. - if (LVI && ProcessThreadableEdges(Condition, BB)) + if (ProcessThreadableEdges(Condition, BB, Preference)) return true; return false; - } - - + } + + if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { - if (!LVI && - (!isa<PHINode>(CondCmp->getOperand(0)) || - cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) { - // If we have a comparison, loop over the predecessors to see if there is - // a condition with a lexically identical value. - pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) { - BasicBlock *P = *PI; - if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) - if (PBI->isConditional() && P != BB) { - if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { - if (CI->getOperand(0) == CondCmp->getOperand(0) && - CI->getOperand(1) == CondCmp->getOperand(1) && - CI->getPredicate() == CondCmp->getPredicate()) { - // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(P, BB)) - return true; - } - } - } - } - } - // For a comparison where the LHS is outside this block, it's possible // that we've branched on it before. Used LVI to see if we can simplify // the branch based on that. BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1)); pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE && + if (CondBr && CondConst && CondBr->isConditional() && PI != PE && (!isa<Instruction>(CondCmp->getOperand(0)) || cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) { // For predecessor edge, determine if the comparison is true or false // on that edge. If they're all true or all false, we can simplify the // branch. // FIXME: We could handle mixed true/false by duplicating code. - LazyValueInfo::Tristate Baseline = + LazyValueInfo::Tristate Baseline = LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0), CondConst, *PI, BB); if (Baseline != LazyValueInfo::Unknown) { // Check that all remaining incoming values match the first one. while (++PI != PE) { - LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge( - CondCmp->getPredicate(), - CondCmp->getOperand(0), - CondConst, *PI, BB); + LazyValueInfo::Tristate Ret = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), + CondCmp->getOperand(0), CondConst, *PI, BB); if (Ret != Baseline) break; } - + // If we terminated early, then one of the values didn't match. if (PI == PE) { unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0; unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1; - RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD); + CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); CondBr->eraseFromParent(); return true; @@ -755,174 +757,37 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) if (isa<Constant>(CondCmp->getOperand(1))) SimplifyValue = CondCmp->getOperand(0); - + // TODO: There are other places where load PRE would be profitable, such as // more complex comparisons. if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) if (SimplifyPartiallyRedundantLoad(LI)) return true; - - + + // Handle a variety of cases where we are branching on something derived from // a PHI node in the current block. If we can prove that any predecessors // compute a predictable value based on a PHI node, thread those predecessors. // - if (ProcessThreadableEdges(CondInst, BB)) + if (ProcessThreadableEdges(CondInst, BB, Preference)) return true; - + // If this is an otherwise-unfoldable branch on a phi node in the current // block, see if we can simplify. if (PHINode *PN = dyn_cast<PHINode>(CondInst)) if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) return ProcessBranchOnPHI(PN); - - + + // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. if (CondInst->getOpcode() == Instruction::Xor && CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator())) return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst)); - - - // TODO: If we have: "br (X > 0)" and we have a predecessor where we know - // "(X == 4)", thread through this block. - - return false; -} -/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that -/// block that jump on exactly the same condition. This means that we almost -/// always know the direction of the edge in the DESTBB: -/// PREDBB: -/// br COND, DESTBB, BBY -/// DESTBB: -/// br COND, BBZ, BBW -/// -/// If DESTBB has multiple predecessors, we can't just constant fold the branch -/// in DESTBB, we have to thread over it. -bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, - BasicBlock *BB) { - BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator()); - - // If both successors of PredBB go to DESTBB, we don't know anything. We can - // fold the branch to an unconditional one, which allows other recursive - // simplifications. - bool BranchDir; - if (PredBI->getSuccessor(1) != BB) - BranchDir = true; - else if (PredBI->getSuccessor(0) != BB) - BranchDir = false; - else { - DEBUG(dbgs() << " In block '" << PredBB->getName() - << "' folding terminator: " << *PredBB->getTerminator() << '\n'); - ++NumFolds; - ConstantFoldTerminator(PredBB); - return true; - } - - BranchInst *DestBI = cast<BranchInst>(BB->getTerminator()); - // If the dest block has one predecessor, just fix the branch condition to a - // constant and fold it. - if (BB->getSinglePredecessor()) { - DEBUG(dbgs() << " In block '" << BB->getName() - << "' folding condition to '" << BranchDir << "': " - << *BB->getTerminator() << '\n'); - ++NumFolds; - Value *OldCond = DestBI->getCondition(); - DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), - BranchDir)); - // Delete dead instructions before we fold the branch. Folding the branch - // can eliminate edges from the CFG which can end up deleting OldCond. - RecursivelyDeleteTriviallyDeadInstructions(OldCond); - ConstantFoldTerminator(BB); - return true; - } - - - // Next, figure out which successor we are threading to. - BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); - - SmallVector<BasicBlock*, 2> Preds; - Preds.push_back(PredBB); - - // Ok, try to thread it! - return ThreadEdge(BB, Preds, SuccBB); -} - -/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that -/// block that switch on exactly the same condition. This means that we almost -/// always know the direction of the edge in the DESTBB: -/// PREDBB: -/// switch COND [... DESTBB, BBY ... ] -/// DESTBB: -/// switch COND [... BBZ, BBW ] -/// -/// Optimizing switches like this is very important, because simplifycfg builds -/// switches out of repeated 'if' conditions. -bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, - BasicBlock *DestBB) { - // Can't thread edge to self. - if (PredBB == DestBB) - return false; - - SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator()); - SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator()); - - // There are a variety of optimizations that we can potentially do on these - // blocks: we order them from most to least preferable. - - // If DESTBB *just* contains the switch, then we can forward edges from PREDBB - // directly to their destination. This does not introduce *any* code size - // growth. Skip debug info first. - BasicBlock::iterator BBI = DestBB->begin(); - while (isa<DbgInfoIntrinsic>(BBI)) - BBI++; - - // FIXME: Thread if it just contains a PHI. - if (isa<SwitchInst>(BBI)) { - bool MadeChange = false; - // Ignore the default edge for now. - for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) { - ConstantInt *DestVal = DestSI->getCaseValue(i); - BasicBlock *DestSucc = DestSI->getSuccessor(i); - - // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if - // PredSI has an explicit case for it. If so, forward. If it is covered - // by the default case, we can't update PredSI. - unsigned PredCase = PredSI->findCaseValue(DestVal); - if (PredCase == 0) continue; - - // If PredSI doesn't go to DestBB on this value, then it won't reach the - // case on this condition. - if (PredSI->getSuccessor(PredCase) != DestBB && - DestSI->getSuccessor(i) != DestBB) - continue; - - // Do not forward this if it already goes to this destination, this would - // be an infinite loop. - if (PredSI->getSuccessor(PredCase) == DestSucc) - continue; - - // Otherwise, we're safe to make the change. Make sure that the edge from - // DestSI to DestSucc is not critical and has no PHI nodes. - DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); - DEBUG(dbgs() << "THROUGH: " << *DestSI); + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know + // "(X == 4)", thread through this block. - // If the destination has PHI nodes, just split the edge for updating - // simplicity. - if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){ - SplitCriticalEdge(DestSI, i, this); - DestSucc = DestSI->getSuccessor(i); - } - FoldSingleEntryPHINodes(DestSucc); - PredSI->setSuccessor(PredCase, DestSucc); - MadeChange = true; - } - - if (MadeChange) - return true; - } - return false; } @@ -934,13 +799,13 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Don't hack volatile loads. if (LI->isVolatile()) return false; - + // If the load is defined in a block with exactly one predecessor, it can't be // partially redundant. BasicBlock *LoadBB = LI->getParent(); if (LoadBB->getSinglePredecessor()) return false; - + Value *LoadedPtr = LI->getOperand(0); // If the loaded operand is defined in the LoadBB, it can't be available. @@ -948,17 +813,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) if (PtrOp->getParent() == LoadBB) return false; - + // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. BasicBlock::iterator BBIt = LI; - if (Value *AvailableVal = + if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) { // If the value if the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; - + // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); @@ -972,13 +837,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // might clobber its value. if (BBIt != LoadBB->begin()) return false; - - + + SmallPtrSet<BasicBlock*, 8> PredsScanned; typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; AvailablePredsTy AvailablePreds; BasicBlock *OneUnavailablePred = 0; - + // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); @@ -996,23 +861,23 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { OneUnavailablePred = PredBB; continue; } - + // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); } - + // If the loaded value isn't available in any predecessor, it isn't partially // redundant. if (AvailablePreds.empty()) return false; - + // Okay, the loaded value is available in at least one (and maybe all!) // predecessors. If the value is unavailable in more than one unique // predecessor, we want to insert a merge block for those common predecessors. // This ensures that we only have to insert one reload, thus not increasing // code size. BasicBlock *UnavailablePred = 0; - + // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an // unconditional branch, we know that it isn't a critical edge. @@ -1035,17 +900,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the predecessor is an indirect goto, we can't split the edge. if (isa<IndirectBrInst>(P->getTerminator())) return false; - + if (!AvailablePredSet.count(P)) PredsToSplit.push_back(P); } - + // Split them out to their own block. UnavailablePred = SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), "thread-pre-split", this); } - + // If the value isn't available in all predecessors, then there will be // exactly one where it isn't available. Insert a load on that edge and add // it to the AvailablePreds list. @@ -1057,35 +922,35 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { UnavailablePred->getTerminator()); AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } - + // Now we know that each predecessor of this block has a value in // AvailablePreds, sort them for efficient access as we're walking the preds. array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); - + // Create a PHI node at the start of the block for the PRE'd load value. PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); PN->takeName(LI); - + // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { BasicBlock *P = *PI; - AvailablePredsTy::iterator I = + AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), std::make_pair(P, (Value*)0)); - + assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); - + PN->addIncoming(I->second, I->first); } - + //cerr << "PRE: " << *LI << *PN << "\n"; - + LI->replaceAllUsesWith(PN); LI->eraseFromParent(); - + return true; } @@ -1097,7 +962,7 @@ FindMostPopularDest(BasicBlock *BB, const SmallVectorImpl<std::pair<BasicBlock*, BasicBlock*> > &PredToDestList) { assert(!PredToDestList.empty()); - + // Determine popularity. If there are multiple possible destinations, we // explicitly choose to ignore 'undef' destinations. We prefer to thread // blocks with known and real destinations to threading undef. We'll handle @@ -1106,13 +971,13 @@ FindMostPopularDest(BasicBlock *BB, for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) if (PredToDestList[i].second) DestPopularity[PredToDestList[i].second]++; - + // Find the most popular dest. DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); BasicBlock *MostPopularDest = DPI->first; unsigned Popularity = DPI->second; SmallVector<BasicBlock*, 4> SamePopularity; - + for (++DPI; DPI != DestPopularity.end(); ++DPI) { // If the popularity of this entry isn't higher than the popularity we've // seen so far, ignore it. @@ -1126,10 +991,10 @@ FindMostPopularDest(BasicBlock *BB, SamePopularity.clear(); MostPopularDest = DPI->first; Popularity = DPI->second; - } + } } - - // Okay, now we know the most popular destination. If there is more than + + // Okay, now we know the most popular destination. If there is more than one // destination, we need to determine one. This is arbitrary, but we need // to make a deterministic decision. Pick the first one that appears in the // successor list. @@ -1138,105 +1003,105 @@ FindMostPopularDest(BasicBlock *BB, TerminatorInst *TI = BB->getTerminator(); for (unsigned i = 0; ; ++i) { assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); - + if (std::find(SamePopularity.begin(), SamePopularity.end(), TI->getSuccessor(i)) == SamePopularity.end()) continue; - + MostPopularDest = TI->getSuccessor(i); break; } } - + // Okay, we have finally picked the most popular destination. return MostPopularDest; } -bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { +bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, + ConstantPreference Preference) { // If threading this would thread across a loop header, don't even try to // thread the edge. if (LoopHeaders.count(BB)) return false; - - SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues; - if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) + + PredValueInfoTy PredValues; + if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference)) return false; - + assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); DEBUG(dbgs() << "IN BB: " << *BB; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { - dbgs() << " BB '" << BB->getName() << "': FOUND condition = "; - if (PredValues[i].first) - dbgs() << *PredValues[i].first; - else - dbgs() << "UNDEF"; - dbgs() << " for pred '" << PredValues[i].second->getName() - << "'.\n"; + dbgs() << " BB '" << BB->getName() << "': FOUND condition = " + << *PredValues[i].first + << " for pred '" << PredValues[i].second->getName() << "'.\n"; }); - + // Decide what we want to thread through. Convert our list of known values to // a list of known destinations for each pred. This also discards duplicate // predecessors and keeps track of the undefined inputs (which are represented // as a null dest in the PredToDestList). SmallPtrSet<BasicBlock*, 16> SeenPreds; SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; - + BasicBlock *OnlyDest = 0; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; - + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { BasicBlock *Pred = PredValues[i].second; if (!SeenPreds.insert(Pred)) continue; // Duplicate predecessor entry. - + // If the predecessor ends with an indirect goto, we can't change its // destination. if (isa<IndirectBrInst>(Pred->getTerminator())) continue; - - ConstantInt *Val = PredValues[i].first; - + + Constant *Val = PredValues[i].first; + BasicBlock *DestBB; - if (Val == 0) // Undef. + if (isa<UndefValue>(Val)) DestBB = 0; else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - DestBB = BI->getSuccessor(Val->isZero()); + DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); + else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) + DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val))); else { - SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); - DestBB = SI->getSuccessor(SI->findCaseValue(Val)); + assert(isa<IndirectBrInst>(BB->getTerminator()) + && "Unexpected terminator"); + DestBB = cast<BlockAddress>(Val)->getBasicBlock(); } // If we have exactly one destination, remember it for efficiency below. - if (i == 0) + if (PredToDestList.empty()) OnlyDest = DestBB; else if (OnlyDest != DestBB) OnlyDest = MultipleDestSentinel; - + PredToDestList.push_back(std::make_pair(Pred, DestBB)); } - + // If all edges were unthreadable, we fail. if (PredToDestList.empty()) return false; - + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one // threadable destination (the common case) we can avoid this. BasicBlock *MostPopularDest = OnlyDest; - + if (MostPopularDest == MultipleDestSentinel) MostPopularDest = FindMostPopularDest(BB, PredToDestList); - + // Now that we know what the most popular destination is, factor all // predecessors that will jump to it into a single predecessor. SmallVector<BasicBlock*, 16> PredsToFactor; for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) if (PredToDestList[i].second == MostPopularDest) { BasicBlock *Pred = PredToDestList[i].first; - + // This predecessor may be a switch or something else that has multiple // edges to the block. Factor each of these edges by listing them // according to # occurrences in PredsToFactor. @@ -1251,7 +1116,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { if (MostPopularDest == 0) MostPopularDest = BB->getTerminator()-> getSuccessor(GetBestDestForJumpOnUndef(BB)); - + // Ok, try to thread it! return ThreadEdge(BB, PredsToFactor, MostPopularDest); } @@ -1259,15 +1124,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on /// a PHI node in the current block. See if there are any simplifications we /// can do based on inputs to the phi node. -/// +/// bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { BasicBlock *BB = PN->getParent(); - + // TODO: We could make use of this to do it once for blocks with common PHI // values. SmallVector<BasicBlock*, 1> PredBBs; PredBBs.resize(1); - + // If any of the predecessor blocks end in an unconditional branch, we can // *duplicate* the conditional branch into that block in order to further // encourage jump threading and to eliminate cases where we have branch on a @@ -1289,21 +1154,21 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { /// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on /// a xor instruction in the current block. See if there are any /// simplifications we can do based on inputs to the xor. -/// +/// bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { BasicBlock *BB = BO->getParent(); - + // If either the LHS or RHS of the xor is a constant, don't do this // optimization. if (isa<ConstantInt>(BO->getOperand(0)) || isa<ConstantInt>(BO->getOperand(1))) return false; - + // If the first instruction in BB isn't a phi, we won't be able to infer // anything special about any particular predecessor. if (!isa<PHINode>(BB->front())) return false; - + // If we have a xor as the branch input to this block, and we know that the // LHS or RHS of the xor in any predecessor is true/false, then we can clone // the condition into the predecessor and fix that value to true, saving some @@ -1322,15 +1187,17 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // %Y = icmp ne i32 %A, %B // br i1 %Z, ... - SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues; + PredValueInfoTy XorOpValues; bool isLHS = true; - if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) { + if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues, + WantInteger)) { assert(XorOpValues.empty()); - if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues)) + if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues, + WantInteger)) return false; isLHS = false; } - + assert(!XorOpValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); @@ -1338,29 +1205,33 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // predecessors can be of the set true, false, or undef. unsigned NumTrue = 0, NumFalse = 0; for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { - if (!XorOpValues[i].first) continue; // Ignore undefs for the count. - if (XorOpValues[i].first->isZero()) + if (isa<UndefValue>(XorOpValues[i].first)) + // Ignore undefs for the count. + continue; + if (cast<ConstantInt>(XorOpValues[i].first)->isZero()) ++NumFalse; else ++NumTrue; } - + // Determine which value to split on, true, false, or undef if neither. ConstantInt *SplitVal = 0; if (NumTrue > NumFalse) SplitVal = ConstantInt::getTrue(BB->getContext()); else if (NumTrue != 0 || NumFalse != 0) SplitVal = ConstantInt::getFalse(BB->getContext()); - + // Collect all of the blocks that this can be folded into so that we can // factor this once and clone it once. SmallVector<BasicBlock*, 8> BlocksToFoldInto; for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { - if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue; + if (XorOpValues[i].first != SplitVal && + !isa<UndefValue>(XorOpValues[i].first)) + continue; BlocksToFoldInto.push_back(XorOpValues[i].second); } - + // If we inferred a value for all of the predecessors, then duplication won't // help us. However, we can just replace the LHS or RHS with the constant. if (BlocksToFoldInto.size() == @@ -1377,10 +1248,10 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // If all preds provide 1, set the computed value to 1. BO->setOperand(!isLHS, SplitVal); } - + return true; } - + // Try to duplicate BB into PredBB. return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); } @@ -1398,14 +1269,14 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, // Ok, we have a PHI node. Figure out what the incoming value was for the // DestBlock. Value *IV = PN->getIncomingValueForBlock(OldPred); - + // Remap the value if necessary. if (Instruction *Inst = dyn_cast<Instruction>(IV)) { DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst); if (I != ValueMap.end()) IV = I->second; } - + PN->addIncoming(IV, NewPred); } } @@ -1413,8 +1284,8 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, /// ThreadEdge - We have decided that it is safe and profitable to factor the /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB /// across BB. Transform the IR to reflect this change. -bool JumpThreading::ThreadEdge(BasicBlock *BB, - const SmallVectorImpl<BasicBlock*> &PredBBs, +bool JumpThreading::ThreadEdge(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &PredBBs, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { @@ -1422,7 +1293,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, << "' - would thread to self!\n"); return false; } - + // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { @@ -1438,7 +1309,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, << "' - Cost is too high: " << JumpThreadCost << "\n"); return false; } - + // And finally, do it! Start by factoring the predecessors is needed. BasicBlock *PredBB; if (PredBBs.size() == 1) @@ -1449,30 +1320,29 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), ".thr_comm", this); } - + // And finally, do it! DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '" << SuccBB->getName() << "' with cost: " << JumpThreadCost << ", across block:\n " << *BB << "\n"); - - if (LVI) - LVI->threadEdge(PredBB, BB, SuccBB); - + + LVI->threadEdge(PredBB, BB, SuccBB); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. DenseMap<Instruction*, Value*> ValueMapping; - - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), - BB->getName()+".thread", + + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), + BB->getName()+".thread", BB->getParent(), BB); NewBB->moveAfter(PredBB); - + BasicBlock::iterator BI = BB->begin(); for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - + // Clone the non-phi instructions of BB into NewBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; !isa<TerminatorInst>(BI); ++BI) { @@ -1480,7 +1350,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[BI] = New; - + // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { @@ -1489,15 +1359,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, New->setOperand(i, I->second); } } - + // We didn't copy the terminator from BB over to NewBB, because there is now // an unconditional jump to SuccBB. Insert the unconditional jump. BranchInst::Create(SuccBB, NewBB); - + // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); - + // If there were values defined in BB that are used outside the block, then we // now have to update all uses of the value to use either the original value, // the cloned value, or some PHI derived value. This can require arbitrary @@ -1515,14 +1385,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, continue; } else if (User->getParent() == BB) continue; - + UsesToRename.push_back(&UI.getUse()); } - + // If there are no uses outside the block, we're done with this instruction. if (UsesToRename.empty()) continue; - + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside @@ -1531,28 +1401,28 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); - + while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); DEBUG(dbgs() << "\n"); } - - + + // Ok, NewBB is good to go. Update the terminator of PredBB to jump to // NewBB instead of BB. This eliminates predecessors from BB, which requires // us to simplify any PHI nodes in BB. TerminatorInst *PredTerm = PredBB->getTerminator(); for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) if (PredTerm->getSuccessor(i) == BB) { - RemovePredecessorAndSimplify(BB, PredBB, TD); + BB->removePredecessor(PredBB, true); PredTerm->setSuccessor(i, NewBB); } - + // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. SimplifyInstructionsInBlock(NewBB, TD); - + // Threaded an edge! ++NumThreads; return true; @@ -1576,14 +1446,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, << "' - it might create an irreducible loop!\n"); return false; } - + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); if (DuplicationCost > Threshold) { DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); return false; } - + // And finally, do it! Start by factoring the predecessors is needed. BasicBlock *PredBB; if (PredBBs.size() == 1) @@ -1594,35 +1464,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), ".thr_comm", this); } - + // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" << PredBB->getName() << "' to eliminate branch on phi. Cost: " << DuplicationCost << " block is:" << *BB << "\n"); - + // Unless PredBB ends with an unconditional branch, split the edge so that we // can just clone the bits from BB into the end of the new PredBB. BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); - + if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { PredBB = SplitEdge(PredBB, BB, this); OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); } - + // We are going to have to map operands from the original BB block into the // PredBB block. Evaluate PHI nodes in BB. DenseMap<Instruction*, Value*> ValueMapping; - + BasicBlock::iterator BI = BB->begin(); for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - + // Clone the non-phi instructions of BB into PredBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; BI != BB->end(); ++BI) { Instruction *New = BI->clone(); - + // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { @@ -1644,7 +1514,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, ValueMapping[BI] = New; } } - + // Check to see if the targets of the branch had PHI nodes. If so, we need to // add entries to the PHI nodes for branch from PredBB now. BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); @@ -1652,7 +1522,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, ValueMapping); AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, ValueMapping); - + // If there were values defined in BB that are used outside the block, then we // now have to update all uses of the value to use either the original value, // the cloned value, or some PHI derived value. This can require arbitrary @@ -1670,35 +1540,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, continue; } else if (User->getParent() == BB) continue; - + UsesToRename.push_back(&UI.getUse()); } - + // If there are no uses outside the block, we're done with this instruction. if (UsesToRename.empty()) continue; - + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); - + // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); - + while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); DEBUG(dbgs() << "\n"); } - + // PredBB no longer jumps to BB, remove entries in the PHI node for the edge // that we nuked. - RemovePredecessorAndSimplify(BB, PredBB, TD); - + BB->removePredecessor(PredBB, true); + // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); - + ++NumDupes; return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 2ef8544..0786793 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -36,13 +36,13 @@ #include "llvm/DerivedTypes.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CFG.h" @@ -66,7 +66,9 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid - LICM() : LoopPass(ID) {} + LICM() : LoopPass(ID) { + initializeLICMPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -80,7 +82,7 @@ namespace { AU.addRequiredID(LoopSimplifyID); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<ScalarEvolution>(); + AU.addPreserved("scalar-evolution"); AU.addPreservedID(LoopSimplifyID); } @@ -129,42 +131,7 @@ namespace { /// bool inSubLoop(BasicBlock *BB) { assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); - for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I) - if ((*I)->contains(BB)) - return true; // A subloop actually contains this block! - return false; - } - - /// isExitBlockDominatedByBlockInLoop - This method checks to see if the - /// specified exit block of the loop is dominated by the specified block - /// that is in the body of the loop. We use these constraints to - /// dramatically limit the amount of the dominator tree that needs to be - /// searched. - bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock, - BasicBlock *BlockInLoop) const { - // If the block in the loop is the loop header, it must be dominated! - BasicBlock *LoopHeader = CurLoop->getHeader(); - if (BlockInLoop == LoopHeader) - return true; - - DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop); - DomTreeNode *IDom = DT->getNode(ExitBlock); - - // Because the exit block is not in the loop, we know we have to get _at - // least_ its immediate dominator. - IDom = IDom->getIDom(); - - while (IDom && IDom != BlockInLoopNode) { - // If we have got to the header of the loop, then the instructions block - // did not dominate the exit node, so we can't hoist it. - if (IDom->getBlock() == LoopHeader) - return false; - - // Get next Immediate Dominator. - IDom = IDom->getIDom(); - }; - - return true; + return LI->getLoopFor(BB) != CurLoop; } /// sink - When an instruction is found to only be used outside of the loop, @@ -187,13 +154,13 @@ namespace { /// pointerInvalidatedByLoop - Return true if the body of this loop may /// store into the memory location pointed to by V. /// - bool pointerInvalidatedByLoop(Value *V, unsigned Size) { + bool pointerInvalidatedByLoop(Value *V, uint64_t Size, + const MDNode *TBAAInfo) { // Check to see if any of the basic blocks in CurLoop invalidate *V. - return CurAST->getAliasSetForPointer(V, Size).isMod(); + return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod(); } bool canSinkOrHoistInst(Instruction &I); - bool isLoopInvariantInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); void PromoteAliasSet(AliasSet &AS); @@ -201,7 +168,12 @@ namespace { } char LICM::ID = 0; -INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false); +INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } @@ -369,7 +341,7 @@ void LICM::HoistRegion(DomTreeNode *N) { // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. // - if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && + if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) && isSafeToExecuteUnconditionally(I)) hoist(I); } @@ -394,16 +366,17 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { return true; // Don't hoist loads which have may-aliased stores in loop. - unsigned Size = 0; + uint64_t Size = 0; if (LI->getType()->isSized()) Size = AA->getTypeStoreSize(LI->getType()); - return !pointerInvalidatedByLoop(LI->getOperand(0), Size); + return !pointerInvalidatedByLoop(LI->getOperand(0), Size, + LI->getMetadata(LLVMContext::MD_tbaa)); } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { // Handle obvious cases efficiently. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) return true; - else if (Behavior == AliasAnalysis::OnlyReadsMemory) { + if (AliasAnalysis::onlyReadsMemory(Behavior)) { // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; @@ -452,20 +425,6 @@ bool LICM::isNotUsedInLoop(Instruction &I) { } -/// isLoopInvariantInst - Return true if all operands of this instruction are -/// loop invariant. We also filter out non-hoistable instructions here just for -/// efficiency. -/// -bool LICM::isLoopInvariantInst(Instruction &I) { - // The instruction is loop invariant if all of its operands are loop-invariant - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - if (!CurLoop->isLoopInvariant(I.getOperand(i))) - return false; - - // If we got this far, the instruction is loop invariant! - return true; -} - /// sink - When an instruction is found to only be used outside of the loop, /// this function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its @@ -486,7 +445,7 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { + if (!DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -537,7 +496,7 @@ void LICM::sink(Instruction &I) { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; - if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) + if (!DT->dominates(InstOrigBB, ExitBlock)) continue; // Insert the code after the last PHI node. @@ -628,15 +587,61 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); - // For each exit block, get the DT node and walk up the DT until the - // instruction's basic block is found or we exit the loop. + // Verify that the block dominates each of the exit blocks of the loop. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent())) + if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) return false; return true; } +namespace { + class LoopPromoter : public LoadAndStorePromoter { + Value *SomePtr; // Designated pointer to store to. + SmallPtrSet<Value*, 4> &PointerMustAliases; + SmallVectorImpl<BasicBlock*> &LoopExitBlocks; + AliasSetTracker &AST; + public: + LoopPromoter(Value *SP, + const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, + SmallPtrSet<Value*, 4> &PMA, + SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast) + : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), + LoopExitBlocks(LEB), AST(ast) {} + + virtual bool isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &) const { + Value *Ptr; + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + Ptr = LI->getOperand(0); + else + Ptr = cast<StoreInst>(I)->getPointerOperand(); + return PointerMustAliases.count(Ptr); + } + + virtual void doExtraRewritesBeforeFinalDeletion() const { + // Insert stores after in the loop exit blocks. Each exit block gets a + // store of the live-out values that feed them. Since we've already told + // the SSA updater about the defs in the loop and the preheader + // definition, it is all set and we can start using it. + for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = LoopExitBlocks[i]; + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Instruction *InsertPos = ExitBlock->getFirstNonPHI(); + new StoreInst(LiveInValue, SomePtr, InsertPos); + } + } + + virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const { + // Update alias analysis. + AST.copyValue(LI, V); + } + virtual void instructionDeleted(Instruction *I) const { + AST.deleteValue(I); + } + }; +} // end anon namespace + /// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers @@ -697,8 +702,11 @@ void LICM::PromoteAliasSet(AliasSet &AS) { if (isa<LoadInst>(Use)) assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); else if (isa<StoreInst>(Use)) { + // Stores *of* the pointer are not interesting, only stores *to* the + // pointer. + if (Use->getOperand(1) != ASIV) + continue; assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); - if (Use->getOperand(0) == ASIV) return; } else return; // Not a load or store. @@ -718,179 +726,43 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Changed = true; ++NumPromoted; + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); + LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, + *CurAST); - // It wants to know some value of the same type as what we'll be inserting. - Value *SomeValue; - if (isa<LoadInst>(LoopUses[0])) - SomeValue = LoopUses[0]; - else - SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); - SSA.Initialize(SomeValue->getType(), SomeValue->getName()); - - // First step: bucket up uses of the pointers by the block they occur in. - // This is important because we have to handle multiple defs/uses in a block - // ourselves: SSAUpdater is purely for cross-block references. - // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. - DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; - for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { - Instruction *User = LoopUses[i]; - UsesByBlock[User->getParent()].push_back(User); - } - - // Okay, now we can iterate over all the blocks in the loop with uses, - // processing them. Keep track of which loads are loading a live-in value. - SmallVector<LoadInst*, 32> LiveInLoads; - DenseMap<Value*, Value*> ReplacedLoads; - - for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { - Instruction *User = LoopUses[LoopUse]; - std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; - - // If this block has already been processed, ignore this repeat use. - if (BlockUses.empty()) continue; - - // Okay, this is the first use in the block. If this block just has a - // single user in it, we can rewrite it trivially. - if (BlockUses.size() == 1) { - // If it is a store, it is a trivial def of the value in the block. - if (isa<StoreInst>(User)) { - SSA.AddAvailableValue(User->getParent(), - cast<StoreInst>(User)->getOperand(0)); - } else { - // Otherwise it is a load, queue it to rewrite as a live-in load. - LiveInLoads.push_back(cast<LoadInst>(User)); - } - BlockUses.clear(); - continue; - } - - // Otherwise, check to see if this block is all loads. If so, we can queue - // them all as live in loads. - bool HasStore = false; - for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { - if (isa<StoreInst>(BlockUses[i])) { - HasStore = true; - break; - } - } - - if (!HasStore) { - for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) - LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); - BlockUses.clear(); - continue; - } - - // Otherwise, we have mixed loads and stores (or just a bunch of stores). - // Since SSAUpdater is purely for cross-block values, we need to determine - // the order of these instructions in the block. If the first use in the - // block is a load, then it uses the live in value. The last store defines - // the live out value. We handle this by doing a linear scan of the block. - BasicBlock *BB = User->getParent(); - Value *StoredValue = 0; - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { - // If this is a load from an unrelated pointer, ignore it. - if (!PointerMustAliases.count(L->getOperand(0))) continue; - - // If we haven't seen a store yet, this is a live in use, otherwise - // use the stored value. - if (StoredValue) { - L->replaceAllUsesWith(StoredValue); - ReplacedLoads[L] = StoredValue; - } else { - LiveInLoads.push_back(L); - } - continue; - } - - if (StoreInst *S = dyn_cast<StoreInst>(II)) { - // If this is a store to an unrelated pointer, ignore it. - if (!PointerMustAliases.count(S->getOperand(1))) continue; - - // Remember that this is the active value in the block. - StoredValue = S->getOperand(0); - } - } - - // The last stored value that happened is the live-out for the block. - assert(StoredValue && "Already checked that there is a store in block"); - SSA.AddAvailableValue(BB, StoredValue); - BlockUses.clear(); - } - - // Now that all the intra-loop values are classified, set up the preheader. - // It gets a load of the pointer we're promoting, and it is the live-out value - // from the preheader. - LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", - Preheader->getTerminator()); + // Set up the preheader to have a definition of the value. It is the live-out + // value from the preheader that uses in the loop will use. + LoadInst *PreheaderLoad = + new LoadInst(SomePtr, SomePtr->getName()+".promoted", + Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); - // Now that the preheader is good to go, set up the exit blocks. Each exit - // block gets a store of the live-out values that feed them. Since we've - // already told the SSA updater about the defs in the loop and the preheader - // definition, it is all set and we can start using it. - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = ExitBlocks[i]; - Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstNonPHI(); - new StoreInst(LiveInValue, SomePtr, InsertPos); + // Copy any value stored to or loaded from a must-alias of the pointer. + if (PreheaderLoad->getType()->isPointerTy()) { + Value *SomeValue; + if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0])) + SomeValue = LI; + else + SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand(); + + CurAST->copyValue(SomeValue, PreheaderLoad); } - // Okay, now we rewrite all loads that use live-in values in the loop, - // inserting PHI nodes as necessary. - for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { - LoadInst *ALoad = LiveInLoads[i]; - Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); - ALoad->replaceAllUsesWith(NewVal); - CurAST->copyValue(ALoad, NewVal); - ReplacedLoads[ALoad] = NewVal; - } + // Rewrite all the loads in the loop and remember all the definitions from + // stores in the loop. + Promoter.run(LoopUses); // If the preheader load is itself a pointer, we need to tell alias analysis // about the new pointer we created in the preheader block and about any PHI // nodes that just got inserted. if (PreheaderLoad->getType()->isPointerTy()) { - // Copy any value stored to or loaded from a must-alias of the pointer. - CurAST->copyValue(SomeValue, PreheaderLoad); - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - CurAST->copyValue(SomeValue, NewPHIs[i]); - } - - // Now that everything is rewritten, delete the old instructions from the body - // of the loop. They should all be dead now. - for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { - Instruction *User = LoopUses[i]; - - // If this is a load that still has uses, then the load must have been added - // as a live value in the SSAUpdate data structure for a block (e.g. because - // the loaded value was stored later). In this case, we need to recursively - // propagate the updates until we get to the real value. - if (!User->use_empty()) { - Value *NewVal = ReplacedLoads[User]; - assert(NewVal && "not a replaced load?"); - - // Propagate down to the ultimate replacee. The intermediately loads - // could theoretically already have been deleted, so we don't want to - // dereference the Value*'s. - DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); - while (RLI != ReplacedLoads.end()) { - NewVal = RLI->second; - RLI = ReplacedLoads.find(NewVal); - } - - User->replaceAllUsesWith(NewVal); - CurAST->copyValue(User, NewVal); - } - - CurAST->deleteValue(User); - User->eraseFromParent(); + CurAST->copyValue(PreheaderLoad, NewPHIs[i]); } // fwew, we're done! diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index 543dfc1..6d1d344 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -17,6 +17,7 @@ #define DEBUG_TYPE "loop-delete" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallVector.h" @@ -28,7 +29,9 @@ namespace { class LoopDeletion : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopDeletion() : LoopPass(ID) {} + LoopDeletion() : LoopPass(ID) { + initializeLoopDeletionPass(*PassRegistry::getPassRegistry()); + } // Possibly eliminate loop L if it is dead. bool runOnLoop(Loop* L, LPPassManager& LPM); @@ -49,14 +52,20 @@ namespace { AU.addPreserved<LoopInfo>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - AU.addPreserved<DominanceFrontier>(); } }; } char LoopDeletion::ID = 0; -INITIALIZE_PASS(LoopDeletion, "loop-deletion", - "Delete dead loops", false, false); +INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false) Pass* llvm::createLoopDeletionPass() { return new LoopDeletion(); @@ -183,22 +192,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. DominatorTree& DT = getAnalysis<DominatorTree>(); - DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>(); - SmallPtrSet<DomTreeNode*, 8> ChildNodes; + SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { // Move all of the block's children to be children of the preheader, which // allows us to remove the domtree entry for the block. - ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end()); - for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(), + ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end()); + for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(), DE = ChildNodes.end(); DI != DE; ++DI) { DT.changeImmediateDominator(*DI, DT[preheader]); - if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT); } ChildNodes.clear(); DT.eraseNode(*LI); - if (DF) DF->removeBlock(*LI); // Remove the block from the reference counting scheme, so that we can // delete it freely later. diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp new file mode 100644 index 0000000..d7fa149 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -0,0 +1,594 @@ +//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements an idiom recognizer that transforms simple loops into a +// non-loop form. In cases that this kicks in, it can be a significant +// performance win. +// +//===----------------------------------------------------------------------===// +// +// TODO List: +// +// Future loop memory idioms to recognize: +// memcmp, memmove, strlen, etc. +// Future floating point idioms to recognize in -ffast-math mode: +// fpowi +// Future integer operation idioms to recognize: +// ctpop, ctlz, cttz +// +// Beware that isel's default lowering for ctpop is highly inefficient for +// i64 and larger types when i64 is legal and the value has few bits set. It +// would be good to enhance isel to emit a loop for ctpop in this case. +// +// We should enhance the memset/memcpy recognition to handle multiple stores in +// the loop. This would handle things like: +// void foo(_Complex float *P) +// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } +// +// This could recognize common matrix multiplies and dot product idioms and +// replace them with calls to BLAS (if linked in??). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-idiom" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); +STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); + +namespace { + class LoopIdiomRecognize : public LoopPass { + Loop *CurLoop; + const TargetData *TD; + DominatorTree *DT; + ScalarEvolution *SE; + TargetLibraryInfo *TLI; + public: + static char ID; + explicit LoopIdiomRecognize() : LoopPass(ID) { + initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM); + bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl<BasicBlock*> &ExitBlocks); + + bool processLoopStore(StoreInst *SI, const SCEV *BECount); + bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); + + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, + unsigned StoreAlignment, + Value *SplatValue, Instruction *TheStore, + const SCEVAddRecExpr *Ev, + const SCEV *BECount); + bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, + const SCEVAddRecExpr *StoreEv, + const SCEVAddRecExpr *LoadEv, + const SCEV *BECount); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); + } + }; +} + +char LoopIdiomRecognize::ID = 0; +INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", + false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", + false, false) + +Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } + +/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// and zero out all the operands of this instruction. If any of them become +/// dead, delete them and the computation tree that feeds them. +/// +static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { + SmallVector<Instruction*, 32> NowDeadInsts; + + NowDeadInsts.push_back(I); + + // Before we touch this instruction, remove it from SE! + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + + // This instruction is dead, zap it, in stages. Start by removing it from + // SCEV. + SE.forgetValue(DeadInst); + + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { + Value *Op = DeadInst->getOperand(op); + DeadInst->setOperand(op, 0); + + // If this operand just became dead, add it to the NowDeadInsts list. + if (!Op->use_empty()) continue; + + if (Instruction *OpI = dyn_cast<Instruction>(Op)) + if (isInstructionTriviallyDead(OpI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + + } while (!NowDeadInsts.empty()); +} + +bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { + CurLoop = L; + + // The trip count of the loop must be analyzable. + SE = &getAnalysis<ScalarEvolution>(); + if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + return false; + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount)) return false; + + // If this loop executes exactly one time, then it should be peeled, not + // optimized by this pass. + if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) + if (BECst->getValue()->getValue() == 0) + return false; + + // We require target data for now. + TD = getAnalysisIfAvailable<TargetData>(); + if (TD == 0) return false; + + DT = &getAnalysis<DominatorTree>(); + LoopInfo &LI = getAnalysis<LoopInfo>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + + DEBUG(dbgs() << "loop-idiom Scanning: F[" + << L->getHeader()->getParent()->getName() + << "] Loop %" << L->getHeader()->getName() << "\n"); + + bool MadeChange = false; + // Scan all the blocks in the loop that are not in subloops. + for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; + ++BI) { + // Ignore blocks in subloops. + if (LI.getLoopFor(*BI) != CurLoop) + continue; + + MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); + } + return MadeChange; +} + +/// runOnLoopBlock - Process the specified block, which lives in a counted loop +/// with the specified backedge count. This block is known to be in the current +/// loop and not in any subloops. +bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl<BasicBlock*> &ExitBlocks) { + // We can only promote stores in this block if they are unconditionally + // executed in the loop. For a block to be unconditionally executed, it has + // to dominate all the exit blocks of the loop. Verify this now. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!DT->dominates(BB, ExitBlocks[i])) + return false; + + bool MadeChange = false; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + // Look for store instructions, which may be optimized to memset/memcpy. + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + WeakVH InstPtr(I); + if (!processLoopStore(SI, BECount)) continue; + MadeChange = true; + + // If processing the store invalidated our iterator, start over from the + // top of the block. + if (InstPtr == 0) + I = BB->begin(); + continue; + } + + // Look for memset instructions, which may be optimized to a larger memset. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { + WeakVH InstPtr(I); + if (!processLoopMemSet(MSI, BECount)) continue; + MadeChange = true; + + // If processing the memset invalidated our iterator, start over from the + // top of the block. + if (InstPtr == 0) + I = BB->begin(); + continue; + } + } + + return MadeChange; +} + + +/// processLoopStore - See if this store can be promoted to a memset or memcpy. +bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { + if (SI->isVolatile()) return false; + + Value *StoredVal = SI->getValueOperand(); + Value *StorePtr = SI->getPointerOperand(); + + // Reject stores that are so large that they overflow an unsigned. + uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); + if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided store. If we have something else, it's a + // random store we can't handle. + const SCEVAddRecExpr *StoreEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); + if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + return false; + + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + unsigned StoreSize = (unsigned)SizeInBits >> 3; + const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); + + // TODO: Could also handle negative stride here someday, that will require the + // validity check in mayLoopAccessLocation to be updated though. + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) + return false; + + // See if we can optimize just this store in isolation. + if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount)) + return true; + + // If the stored value is a strided load in the same loop with the same stride + // this this may be transformable into a memcpy. This kicks in for stuff like + // for (i) A[i] = B[i]; + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + const SCEVAddRecExpr *LoadEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); + if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && + StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile()) + if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) + return true; + } + //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; + + return false; +} + +/// processLoopMemSet - See if this memset can be promoted to a large memset. +bool LoopIdiomRecognize:: +processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { + // We can only handle non-volatile memsets with a constant size. + if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; + + // If we're not allowed to hack on memset, we fail. + if (!TLI->has(LibFunc::memset)) + return false; + + Value *Pointer = MSI->getDest(); + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided store. If we have something else, it's a + // random store we can't handle. + const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); + if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) + return false; + + // Reject memsets that are so large that they overflow an unsigned. + uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); + if ((SizeInBytes >> 32) != 0) + return false; + + // Check to see if the stride matches the size of the memset. If so, then we + // know that every byte is touched in the loop. + const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); + + // TODO: Could also handle negative stride here someday, that will require the + // validity check in mayLoopAccessLocation to be updated though. + if (Stride == 0 || MSI->getLength() != Stride->getValue()) + return false; + + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, + MSI->getAlignment(), MSI->getValue(), + MSI, Ev, BECount); +} + + +/// mayLoopAccessLocation - Return true if the specified loop might access the +/// specified pointer location, which is a loop-strided access. The 'Access' +/// argument specifies what the verboten forms of access are (read or write). +static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, + Loop *L, const SCEV *BECount, + unsigned StoreSize, AliasAnalysis &AA, + Instruction *IgnoredStore) { + // Get the location that may be stored across the loop. Since the access is + // strided positively through memory, we say that the modified location starts + // at the pointer and has infinite size. + uint64_t AccessSize = AliasAnalysis::UnknownSize; + + // If the loop iterates a fixed number of times, we can refine the access size + // to be exactly the size of the memset, which is (BECount+1)*StoreSize + if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) + AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; + + // TODO: For this to be really effective, we have to dive into the pointer + // operand in the store. Store to &A[i] of 100 will always return may alias + // with store of &A[100], we need to StoreLoc to be "A" with size of 100, + // which will then no-alias a store to &A[100]. + AliasAnalysis::Location StoreLoc(Ptr, AccessSize); + + for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; + ++BI) + for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) + if (&*I != IgnoredStore && + (AA.getModRefInfo(I, StoreLoc) & Access)) + return true; + + return false; +} + +/// getMemSetPatternValue - If a strided store of the specified value is safe to +/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should +/// be passed in. Otherwise, return null. +/// +/// Note that we don't ever attempt to use memset_pattern8 or 4, because these +/// just replicate their input array and then pass on to memset_pattern16. +static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { + // If the value isn't a constant, we can't promote it to being in a constant + // array. We could theoretically do a store to an alloca or something, but + // that doesn't seem worthwhile. + Constant *C = dyn_cast<Constant>(V); + if (C == 0) return 0; + + // Only handle simple values that are a power of two bytes in size. + uint64_t Size = TD.getTypeSizeInBits(V->getType()); + if (Size == 0 || (Size & 7) || (Size & (Size-1))) + return 0; + + // Don't care enough about darwin/ppc to implement this. + if (TD.isBigEndian()) + return 0; + + // Convert to size in bytes. + Size /= 8; + + // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see + // if the top and bottom are the same (e.g. for vectors and large integers). + if (Size > 16) return 0; + + // If the constant is exactly 16 bytes, just use it. + if (Size == 16) return C; + + // Otherwise, we'll use an array of the constants. + unsigned ArraySize = 16/Size; + ArrayType *AT = ArrayType::get(V->getType(), ArraySize); + return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); +} + + +/// processLoopStridedStore - We see a strided store of some value. If we can +/// transform this into a memset or memset_pattern in the loop preheader, do so. +bool LoopIdiomRecognize:: +processLoopStridedStore(Value *DestPtr, unsigned StoreSize, + unsigned StoreAlignment, Value *StoredVal, + Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount) { + + // If the stored value is a byte-wise value (like i32 -1), then it may be + // turned into a memset of i8 -1, assuming that all the consecutive bytes + // are stored. A store of i32 0x01020304 can never be turned into a memset, + // but it can be turned into memset_pattern if the target supports it. + Value *SplatValue = isBytewiseValue(StoredVal); + Constant *PatternValue = 0; + + // If we're allowed to form a memset, and the stored value would be acceptable + // for memset, use it. + if (SplatValue && TLI->has(LibFunc::memset) && + // Verify that the stored value is loop invariant. If not, we can't + // promote the memset. + CurLoop->isLoopInvariant(SplatValue)) { + // Keep and use SplatValue. + PatternValue = 0; + } else if (TLI->has(LibFunc::memset_pattern16) && + (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { + // It looks like we can use PatternValue! + SplatValue = 0; + } else { + // Otherwise, this isn't an idiom we can transform. For example, we can't + // do anything with a 3-byte store, for example. + return false; + } + + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for an alias. + if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) + return false; + + // Okay, everything looks good, insert the memset. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + + IRBuilder<> Builder(Preheader->getTerminator()); + + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. Just insert code for it in the preheader. + SCEVExpander Expander(*SE); + + unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); + Value *BasePtr = + Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), + Preheader->getTerminator()); + + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to + // pointer size if it isn't already. + const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); + BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); + + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), + true /*no unsigned overflow*/); + if (StoreSize != 1) + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), + true /*no unsigned overflow*/); + + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); + + Value *NewCall; + if (SplatValue) + NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); + else { + Module *M = TheStore->getParent()->getParent()->getParent(); + Value *MSP = M->getOrInsertFunction("memset_pattern16", + Builder.getVoidTy(), + Builder.getInt8PtrTy(), + Builder.getInt8PtrTy(), IntPtr, + (void*)0); + + // Otherwise we should form a memset_pattern16. PatternValue is known to be + // an constant array of 16-bytes. Plop the value into a mergable global. + GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, + GlobalValue::InternalLinkage, + PatternValue, ".memset_pattern"); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(16); + Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); + NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); + } + + DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" + << " from store to: " << *Ev << " at: " << *TheStore << "\n"); + (void)NewCall; + + // Okay, the memset has been formed. Zap the original store and anything that + // feeds into it. + DeleteDeadInstruction(TheStore, *SE); + ++NumMemSet; + return true; +} + +/// processLoopStoreOfLoopLoad - We see a strided store whose value is a +/// same-strided load. +bool LoopIdiomRecognize:: +processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, + const SCEVAddRecExpr *StoreEv, + const SCEVAddRecExpr *LoadEv, + const SCEV *BECount) { + // If we're not allowed to form memcpy, we fail. + if (!TLI->has(LibFunc::memcpy)) + return false; + + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + + // Okay, we have a strided store "p[i]" of a loaded value. We can turn + // this into a memcpy in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the stored location (including the load feeding the stores). + // Check for an alias. + if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef, + CurLoop, BECount, StoreSize, + getAnalysis<AliasAnalysis>(), SI)) + return false; + + // For a memcpy, we have to make sure that the input array is not being + // mutated by the loop. + if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod, + CurLoop, BECount, StoreSize, + getAnalysis<AliasAnalysis>(), SI)) + return false; + + // Okay, everything looks good, insert the memcpy. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + + IRBuilder<> Builder(Preheader->getTerminator()); + + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. Just insert code for it in the preheader. + SCEVExpander Expander(*SE); + + Value *LoadBasePtr = + Expander.expandCodeFor(LoadEv->getStart(), + Builder.getInt8PtrTy(LI->getPointerAddressSpace()), + Preheader->getTerminator()); + Value *StoreBasePtr = + Expander.expandCodeFor(StoreEv->getStart(), + Builder.getInt8PtrTy(SI->getPointerAddressSpace()), + Preheader->getTerminator()); + + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to + // pointer size if it isn't already. + const Type *IntPtr = TD->getIntPtrType(SI->getContext()); + BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); + + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), + true /*no unsigned overflow*/); + if (StoreSize != 1) + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), + true /*no unsigned overflow*/); + + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); + + Value *NewCall = + Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, + std::min(SI->getAlignment(), LI->getAlignment())); + + DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" + << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" + << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + (void)NewCall; + + // Okay, the memset has been formed. Zap the original store and anything that + // feeds into it. + DeleteDeadInstruction(SI, *SE); + ++NumMemCpy; + return true; +} diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp deleted file mode 100644 index a433674..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ /dev/null @@ -1,1270 +0,0 @@ -//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements Loop Index Splitting Pass. This pass handles three -// kinds of loops. -// -// [1] A loop may be eliminated if the body is executed exactly once. -// For example, -// -// for (i = 0; i < N; ++i) { -// if (i == X) { -// body; -// } -// } -// -// is transformed to -// -// i = X; -// body; -// -// [2] A loop's iteration space may be shrunk if the loop body is executed -// for a proper sub-range of the loop's iteration space. For example, -// -// for (i = 0; i < N; ++i) { -// if (i > A && i < B) { -// ... -// } -// } -// -// is transformed to iterators from A to B, if A > 0 and B < N. -// -// [3] A loop may be split if the loop body is dominated by a branch. -// For example, -// -// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; } -// -// is transformed into -// -// AEV = BSV = SV -// for (i = LB; i < min(UB, AEV); ++i) -// A; -// for (i = max(LB, BSV); i < UB; ++i); -// B; -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loop-index-split" -#include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" - -using namespace llvm; - -STATISTIC(NumIndexSplit, "Number of loop index split"); -STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split"); -STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted"); - -namespace { - - class LoopIndexSplit : public LoopPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopIndexSplit() : LoopPass(ID) {} - - // Index split Loop L. Return true if loop is split. - bool runOnLoop(Loop *L, LPPassManager &LPM); - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<ScalarEvolution>(); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); - } - - private: - /// processOneIterationLoop -- Eliminate loop if loop body is executed - /// only once. For example, - /// for (i = 0; i < N; ++i) { - /// if ( i == X) { - /// ... - /// } - /// } - /// - bool processOneIterationLoop(); - - // -- Routines used by updateLoopIterationSpace(); - - /// updateLoopIterationSpace -- Update loop's iteration space if loop - /// body is executed for certain IV range only. For example, - /// - /// for (i = 0; i < N; ++i) { - /// if ( i > A && i < B) { - /// ... - /// } - /// } - /// is transformed to iterators from A to B, if A > 0 and B < N. - /// - bool updateLoopIterationSpace(); - - /// restrictLoopBound - Op dominates loop body. Op compares an IV based value - /// with a loop invariant value. Update loop's lower and upper bound based on - /// the loop invariant value. - bool restrictLoopBound(ICmpInst &Op); - - // --- Routines used by splitLoop(). --- / - - bool splitLoop(); - - /// removeBlocks - Remove basic block DeadBB and all blocks dominated by - /// DeadBB. This routine is used to remove split condition's dead branch, - /// dominated by DeadBB. LiveBB dominates split conidition's other branch. - void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB); - - /// moveExitCondition - Move exit condition EC into split condition block. - void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, - BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC, - PHINode *IV, Instruction *IVAdd, Loop *LP, - unsigned); - - /// updatePHINodes - CFG has been changed. - /// Before - /// - ExitBB's single predecessor was Latch - /// - Latch's second successor was Header - /// Now - /// - ExitBB's single predecessor was Header - /// - Latch's one and only successor was Header - /// - /// Update ExitBB PHINodes' to reflect this change. - void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, - BasicBlock *Header, - PHINode *IV, Instruction *IVIncrement, Loop *LP); - - // --- Utility routines --- / - - /// cleanBlock - A block is considered clean if all non terminal - /// instructions are either PHINodes or IV based values. - bool cleanBlock(BasicBlock *BB); - - /// IVisLT - If Op is comparing IV based value with an loop invariant and - /// IV based value is less than the loop invariant then return the loop - /// invariant. Otherwise return NULL. - Value * IVisLT(ICmpInst &Op); - - /// IVisLE - If Op is comparing IV based value with an loop invariant and - /// IV based value is less than or equal to the loop invariant then - /// return the loop invariant. Otherwise return NULL. - Value * IVisLE(ICmpInst &Op); - - /// IVisGT - If Op is comparing IV based value with an loop invariant and - /// IV based value is greater than the loop invariant then return the loop - /// invariant. Otherwise return NULL. - Value * IVisGT(ICmpInst &Op); - - /// IVisGE - If Op is comparing IV based value with an loop invariant and - /// IV based value is greater than or equal to the loop invariant then - /// return the loop invariant. Otherwise return NULL. - Value * IVisGE(ICmpInst &Op); - - private: - - // Current Loop information. - Loop *L; - LPPassManager *LPM; - LoopInfo *LI; - DominatorTree *DT; - DominanceFrontier *DF; - - PHINode *IndVar; - ICmpInst *ExitCondition; - ICmpInst *SplitCondition; - Value *IVStartValue; - Value *IVExitValue; - Instruction *IVIncrement; - SmallPtrSet<Value *, 4> IVBasedValues; - }; -} - -char LoopIndexSplit::ID = 0; -INITIALIZE_PASS(LoopIndexSplit, "loop-index-split", - "Index Split Loops", false, false); - -Pass *llvm::createLoopIndexSplitPass() { - return new LoopIndexSplit(); -} - -// Index split Loop L. Return true if loop is split. -bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) { - L = IncomingLoop; - LPM = &LPM_Ref; - - // If LoopSimplify form is not available, stay out of trouble. - if (!L->isLoopSimplifyForm()) - return false; - - // FIXME - Nested loops make dominator info updates tricky. - if (!L->getSubLoops().empty()) - return false; - - DT = &getAnalysis<DominatorTree>(); - LI = &getAnalysis<LoopInfo>(); - DF = &getAnalysis<DominanceFrontier>(); - - // Initialize loop data. - IndVar = L->getCanonicalInductionVariable(); - if (!IndVar) return false; - - bool P1InLoop = L->contains(IndVar->getIncomingBlock(1)); - IVStartValue = IndVar->getIncomingValue(!P1InLoop); - IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop)); - if (!IVIncrement) return false; - - IVBasedValues.clear(); - IVBasedValues.insert(IndVar); - IVBasedValues.insert(IVIncrement); - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); - BI != BE; ++BI) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI)) - if (BO != IVIncrement - && (BO->getOpcode() == Instruction::Add - || BO->getOpcode() == Instruction::Sub)) - if (IVBasedValues.count(BO->getOperand(0)) - && L->isLoopInvariant(BO->getOperand(1))) - IVBasedValues.insert(BO); - } - - // Reject loop if loop exit condition is not suitable. - BasicBlock *ExitingBlock = L->getExitingBlock(); - if (!ExitingBlock) - return false; - BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!EBR) return false; - ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition()); - if (!ExitCondition) return false; - if (ExitingBlock != L->getLoopLatch()) return false; - IVExitValue = ExitCondition->getOperand(1); - if (!L->isLoopInvariant(IVExitValue)) - IVExitValue = ExitCondition->getOperand(0); - if (!L->isLoopInvariant(IVExitValue)) - return false; - if (!IVBasedValues.count( - ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0)))) - return false; - - // If start value is more then exit value where induction variable - // increments by 1 then we are potentially dealing with an infinite loop. - // Do not index split this loop. - if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue)) - if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue)) - if (SV->getSExtValue() > EV->getSExtValue()) - return false; - - if (processOneIterationLoop()) - return true; - - if (updateLoopIterationSpace()) - return true; - - if (splitLoop()) - return true; - - return false; -} - -// --- Helper routines --- -// isUsedOutsideLoop - Returns true iff V is used outside the loop L. -static bool isUsedOutsideLoop(Value *V, Loop *L) { - for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (!L->contains(cast<Instruction>(*UI))) - return true; - return false; -} - -// Return V+1 -static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, - LLVMContext &Context) { - Constant *One = ConstantInt::get(V->getType(), 1, Sign); - return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt); -} - -// Return V-1 -static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt, - LLVMContext &Context) { - Constant *One = ConstantInt::get(V->getType(), 1, Sign); - return BinaryOperator::CreateSub(V, One, "lsp", InsertPt); -} - -// Return min(V1, V1) -static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { - - Value *C = new ICmpInst(InsertPt, - Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - V1, V2, "lsp"); - return SelectInst::Create(C, V1, V2, "lsp", InsertPt); -} - -// Return max(V1, V2) -static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { - - Value *C = new ICmpInst(InsertPt, - Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - V1, V2, "lsp"); - return SelectInst::Create(C, V2, V1, "lsp", InsertPt); -} - -/// processOneIterationLoop -- Eliminate loop if loop body is executed -/// only once. For example, -/// for (i = 0; i < N; ++i) { -/// if ( i == X) { -/// ... -/// } -/// } -/// -bool LoopIndexSplit::processOneIterationLoop() { - SplitCondition = NULL; - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Header = L->getHeader(); - BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator()); - if (!BR) return false; - if (!isa<BranchInst>(Latch->getTerminator())) return false; - if (BR->isUnconditional()) return false; - SplitCondition = dyn_cast<ICmpInst>(BR->getCondition()); - if (!SplitCondition) return false; - if (SplitCondition == ExitCondition) return false; - if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false; - if (BR->getOperand(1) != Latch) return false; - if (!IVBasedValues.count(SplitCondition->getOperand(0)) - && !IVBasedValues.count(SplitCondition->getOperand(1))) - return false; - - // If IV is used outside the loop then this loop traversal is required. - // FIXME: Calculate and use last IV value. - if (isUsedOutsideLoop(IVIncrement, L)) - return false; - - // If BR operands are not IV or not loop invariants then skip this loop. - Value *OPV = SplitCondition->getOperand(0); - Value *SplitValue = SplitCondition->getOperand(1); - if (!L->isLoopInvariant(SplitValue)) - std::swap(OPV, SplitValue); - if (!L->isLoopInvariant(SplitValue)) - return false; - Instruction *OPI = dyn_cast<Instruction>(OPV); - if (!OPI) - return false; - if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L)) - return false; - Value *StartValue = IVStartValue; - Value *ExitValue = IVExitValue;; - - if (OPV != IndVar) { - // If BR operand is IV based then use this operand to calculate - // effective conditions for loop body. - BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV); - if (!BOPV) - return false; - if (BOPV->getOpcode() != Instruction::Add) - return false; - StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR); - ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR); - } - - if (!cleanBlock(Header)) - return false; - - if (!cleanBlock(Latch)) - return false; - - // If the merge point for BR is not loop latch then skip this loop. - if (BR->getSuccessor(0) != Latch) { - DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); - assert (DF0 != DF->end() && "Unable to find dominance frontier"); - if (!DF0->second.count(Latch)) - return false; - } - - if (BR->getSuccessor(1) != Latch) { - DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); - assert (DF1 != DF->end() && "Unable to find dominance frontier"); - if (!DF1->second.count(Latch)) - return false; - } - - // Now, Current loop L contains compare instruction - // that compares induction variable, IndVar, against loop invariant. And - // entire (i.e. meaningful) loop body is dominated by this compare - // instruction. In such case eliminate - // loop structure surrounding this loop body. For example, - // for (int i = start; i < end; ++i) { - // if ( i == somevalue) { - // loop_body - // } - // } - // can be transformed into - // if (somevalue >= start && somevalue < end) { - // i = somevalue; - // loop_body - // } - - // Replace index variable with split value in loop body. Loop body is executed - // only when index variable is equal to split value. - IndVar->replaceAllUsesWith(SplitValue); - - // Replace split condition in header. - // Transform - // SplitCondition : icmp eq i32 IndVar, SplitValue - // into - // c1 = icmp uge i32 SplitValue, StartValue - // c2 = icmp ult i32 SplitValue, ExitValue - // and i32 c1, c2 - Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ? - ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, - SplitValue, StartValue, "lisplit"); - - CmpInst::Predicate C2P = ExitCondition->getPredicate(); - BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - if (LatchBR->getOperand(1) != Header) - C2P = CmpInst::getInversePredicate(C2P); - Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit"); - Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR); - - SplitCondition->replaceAllUsesWith(NSplitCond); - SplitCondition->eraseFromParent(); - - // Remove Latch to Header edge. - BasicBlock *LatchSucc = NULL; - Header->removePredecessor(Latch); - for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch); - SI != E; ++SI) { - if (Header != *SI) - LatchSucc = *SI; - } - - // Clean up latch block. - Value *LatchBRCond = LatchBR->getCondition(); - LatchBR->setUnconditionalDest(LatchSucc); - RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond); - - LPM->deleteLoopFromQueue(L); - - // Update Dominator Info. - // Only CFG change done is to remove Latch to Header edge. This - // does not change dominator tree because Latch did not dominate - // Header. - if (DF) { - DominanceFrontier::iterator HeaderDF = DF->find(Header); - if (HeaderDF != DF->end()) - DF->removeFromFrontier(HeaderDF, Header); - - DominanceFrontier::iterator LatchDF = DF->find(Latch); - if (LatchDF != DF->end()) - DF->removeFromFrontier(LatchDF, Header); - } - - ++NumIndexSplitRemoved; - return true; -} - -/// restrictLoopBound - Op dominates loop body. Op compares an IV based value -/// with a loop invariant value. Update loop's lower and upper bound based on -/// the loop invariant value. -bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) { - bool Sign = Op.isSigned(); - Instruction *PHTerm = L->getLoopPreheader()->getTerminator(); - - if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) { - BranchInst *EBR = - cast<BranchInst>(ExitCondition->getParent()->getTerminator()); - ExitCondition->setPredicate(ExitCondition->getInversePredicate()); - BasicBlock *T = EBR->getSuccessor(0); - EBR->setSuccessor(0, EBR->getSuccessor(1)); - EBR->setSuccessor(1, T); - } - - LLVMContext &Context = Op.getContext(); - - // New upper and lower bounds. - Value *NLB = NULL; - Value *NUB = NULL; - if (Value *V = IVisLT(Op)) { - // Restrict upper bound. - if (IVisLE(*ExitCondition)) - V = getMinusOne(V, Sign, PHTerm, Context); - NUB = getMin(V, IVExitValue, Sign, PHTerm); - } else if (Value *V = IVisLE(Op)) { - // Restrict upper bound. - if (IVisLT(*ExitCondition)) - V = getPlusOne(V, Sign, PHTerm, Context); - NUB = getMin(V, IVExitValue, Sign, PHTerm); - } else if (Value *V = IVisGT(Op)) { - // Restrict lower bound. - V = getPlusOne(V, Sign, PHTerm, Context); - NLB = getMax(V, IVStartValue, Sign, PHTerm); - } else if (Value *V = IVisGE(Op)) - // Restrict lower bound. - NLB = getMax(V, IVStartValue, Sign, PHTerm); - - if (!NLB && !NUB) - return false; - - if (NLB) { - unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader()); - IndVar->setIncomingValue(i, NLB); - } - - if (NUB) { - unsigned i = (ExitCondition->getOperand(0) != IVExitValue); - ExitCondition->setOperand(i, NUB); - } - return true; -} - -/// updateLoopIterationSpace -- Update loop's iteration space if loop -/// body is executed for certain IV range only. For example, -/// -/// for (i = 0; i < N; ++i) { -/// if ( i > A && i < B) { -/// ... -/// } -/// } -/// is transformed to iterators from A to B, if A > 0 and B < N. -/// -bool LoopIndexSplit::updateLoopIterationSpace() { - SplitCondition = NULL; - if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE - || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ) - return false; - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Header = L->getHeader(); - BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator()); - if (!BR) return false; - if (!isa<BranchInst>(Latch->getTerminator())) return false; - if (BR->isUnconditional()) return false; - BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition()); - if (!AND) return false; - if (AND->getOpcode() != Instruction::And) return false; - ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0)); - ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1)); - if (!Op0 || !Op1) - return false; - IVBasedValues.insert(AND); - IVBasedValues.insert(Op0); - IVBasedValues.insert(Op1); - if (!cleanBlock(Header)) return false; - BasicBlock *ExitingBlock = ExitCondition->getParent(); - if (!cleanBlock(ExitingBlock)) return false; - - // If the merge point for BR is not loop latch then skip this loop. - if (BR->getSuccessor(0) != Latch) { - DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); - assert (DF0 != DF->end() && "Unable to find dominance frontier"); - if (!DF0->second.count(Latch)) - return false; - } - - if (BR->getSuccessor(1) != Latch) { - DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); - assert (DF1 != DF->end() && "Unable to find dominance frontier"); - if (!DF1->second.count(Latch)) - return false; - } - - // Verify that loop exiting block has only two predecessor, where one pred - // is split condition block. The other predecessor will become exiting block's - // dominator after CFG is updated. TODO : Handle CFG's where exiting block has - // more then two predecessors. This requires extra work in updating dominator - // information. - BasicBlock *ExitingBBPred = NULL; - for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock); - PI != PE; ++PI) { - BasicBlock *BB = *PI; - if (Header == BB) - continue; - if (ExitingBBPred) - return false; - else - ExitingBBPred = BB; - } - - if (!restrictLoopBound(*Op0)) - return false; - - if (!restrictLoopBound(*Op1)) - return false; - - // Update CFG. - if (BR->getSuccessor(0) == ExitingBlock) - BR->setUnconditionalDest(BR->getSuccessor(1)); - else - BR->setUnconditionalDest(BR->getSuccessor(0)); - - AND->eraseFromParent(); - if (Op0->use_empty()) - Op0->eraseFromParent(); - if (Op1->use_empty()) - Op1->eraseFromParent(); - - // Update domiantor info. Now, ExitingBlock has only one predecessor, - // ExitingBBPred, and it is ExitingBlock's immediate domiantor. - DT->changeImmediateDominator(ExitingBlock, ExitingBBPred); - - BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1); - if (L->contains(ExitBlock)) - ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0); - - // If ExitingBlock is a member of the loop basic blocks' DF list then - // replace ExitingBlock with header and exit block in the DF list - DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock); - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - if (BB == Header || BB == ExitingBlock) - continue; - DominanceFrontier::iterator BBDF = DF->find(BB); - DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); - DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); - while (DomSetI != DomSetE) { - DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI; - ++DomSetI; - BasicBlock *DFBB = *CurrentItr; - if (DFBB == ExitingBlock) { - BBDF->second.erase(DFBB); - for (DominanceFrontier::DomSetType::iterator - EBI = ExitingBlockDF->second.begin(), - EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI) - BBDF->second.insert(*EBI); - } - } - } - ++NumRestrictBounds; - return true; -} - -/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB. -/// This routine is used to remove split condition's dead branch, dominated by -/// DeadBB. LiveBB dominates split conidition's other branch. -void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, - BasicBlock *LiveBB) { - - // First update DeadBB's dominance frontier. - SmallVector<BasicBlock *, 8> FrontierBBs; - DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB); - if (DeadBBDF != DF->end()) { - SmallVector<BasicBlock *, 8> PredBlocks; - - DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second; - for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(), - DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI) - { - BasicBlock *FrontierBB = *DeadBBSetI; - FrontierBBs.push_back(FrontierBB); - - // Rremove any PHI incoming edge from blocks dominated by DeadBB. - PredBlocks.clear(); - for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (DT->dominates(DeadBB, P)) - PredBlocks.push_back(P); - } - - for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end(); - FBI != FBE; ++FBI) { - if (PHINode *PN = dyn_cast<PHINode>(FBI)) { - for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(), - PE = PredBlocks.end(); PI != PE; ++PI) { - BasicBlock *P = *PI; - PN->removeIncomingValue(P); - } - } - else - break; - } - } - } - - // Now remove DeadBB and all nodes dominated by DeadBB in df order. - SmallVector<BasicBlock *, 32> WorkList; - DomTreeNode *DN = DT->getNode(DeadBB); - for (df_iterator<DomTreeNode*> DI = df_begin(DN), - E = df_end(DN); DI != E; ++DI) { - BasicBlock *BB = DI->getBlock(); - WorkList.push_back(BB); - BB->replaceAllUsesWith(UndefValue::get( - Type::getLabelTy(DeadBB->getContext()))); - } - - while (!WorkList.empty()) { - BasicBlock *BB = WorkList.pop_back_val(); - LPM->deleteSimpleAnalysisValue(BB, LP); - for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); - BBI != BBE; ) { - Instruction *I = BBI; - ++BBI; - I->replaceAllUsesWith(UndefValue::get(I->getType())); - LPM->deleteSimpleAnalysisValue(I, LP); - I->eraseFromParent(); - } - DT->eraseNode(BB); - DF->removeBlock(BB); - LI->removeBlock(BB); - BB->eraseFromParent(); - } - - // Update Frontier BBs' dominator info. - while (!FrontierBBs.empty()) { - BasicBlock *FBB = FrontierBBs.pop_back_val(); - BasicBlock *NewDominator = FBB->getSinglePredecessor(); - if (!NewDominator) { - pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB); - NewDominator = *PI; - ++PI; - if (NewDominator != LiveBB) { - for(; PI != PE; ++PI) { - BasicBlock *P = *PI; - if (P == LiveBB) { - NewDominator = LiveBB; - break; - } - NewDominator = DT->findNearestCommonDominator(NewDominator, P); - } - } - } - assert (NewDominator && "Unable to fix dominator info."); - DT->changeImmediateDominator(FBB, NewDominator); - DF->changeImmediateDominator(FBB, NewDominator, DT); - } - -} - -// moveExitCondition - Move exit condition EC into split condition block CondBB. -void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, - BasicBlock *ExitBB, ICmpInst *EC, - ICmpInst *SC, PHINode *IV, - Instruction *IVAdd, Loop *LP, - unsigned ExitValueNum) { - - BasicBlock *ExitingBB = EC->getParent(); - Instruction *CurrentBR = CondBB->getTerminator(); - - // Move exit condition into split condition block. - EC->moveBefore(CurrentBR); - EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV); - - // Move exiting block's branch into split condition block. Update its branch - // destination. - BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator()); - ExitingBR->moveBefore(CurrentBR); - BasicBlock *OrigDestBB = NULL; - if (ExitingBR->getSuccessor(0) == ExitBB) { - OrigDestBB = ExitingBR->getSuccessor(1); - ExitingBR->setSuccessor(1, ActiveBB); - } - else { - OrigDestBB = ExitingBR->getSuccessor(0); - ExitingBR->setSuccessor(0, ActiveBB); - } - - // Remove split condition and current split condition branch. - SC->eraseFromParent(); - CurrentBR->eraseFromParent(); - - // Connect exiting block to original destination. - BranchInst::Create(OrigDestBB, ExitingBB); - - // Update PHINodes - updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP); - - // Fix dominator info. - // ExitBB is now dominated by CondBB - DT->changeImmediateDominator(ExitBB, CondBB); - DF->changeImmediateDominator(ExitBB, CondBB, DT); - - // Blocks outside the loop may have been in the dominance frontier of blocks - // inside the condition; this is now impossible because the blocks inside the - // condition no loger dominate the exit. Remove the relevant blocks from - // the dominance frontiers. - for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end(); - I != E; ++I) { - if (!DT->properlyDominates(CondBB, *I)) continue; - DominanceFrontier::iterator BBDF = DF->find(*I); - DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); - DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); - while (DomSetI != DomSetE) { - DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI; - ++DomSetI; - BasicBlock *DFBB = *CurrentItr; - if (!LP->contains(DFBB)) - BBDF->second.erase(DFBB); - } - } -} - -/// updatePHINodes - CFG has been changed. -/// Before -/// - ExitBB's single predecessor was Latch -/// - Latch's second successor was Header -/// Now -/// - ExitBB's single predecessor is Header -/// - Latch's one and only successor is Header -/// -/// Update ExitBB PHINodes' to reflect this change. -void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, - BasicBlock *Header, - PHINode *IV, Instruction *IVIncrement, - Loop *LP) { - - for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end(); - BI != BE; ) { - PHINode *PN = dyn_cast<PHINode>(BI); - ++BI; - if (!PN) - break; - - Value *V = PN->getIncomingValueForBlock(Latch); - if (PHINode *PHV = dyn_cast<PHINode>(V)) { - // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use - // in Header which is new incoming value for PN. - Value *NewV = NULL; - for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); - UI != E; ++UI) - if (PHINode *U = dyn_cast<PHINode>(*UI)) - if (LP->contains(U)) { - NewV = U; - break; - } - - // Add incoming value from header only if PN has any use inside the loop. - if (NewV) - PN->addIncoming(NewV, Header); - - } else if (Instruction *PHI = dyn_cast<Instruction>(V)) { - // If this instruction is IVIncrement then IV is new incoming value - // from header otherwise this instruction must be incoming value from - // header because loop is in LCSSA form. - if (PHI == IVIncrement) - PN->addIncoming(IV, Header); - else - PN->addIncoming(V, Header); - } else - // Otherwise this is an incoming value from header because loop is in - // LCSSA form. - PN->addIncoming(V, Header); - - // Remove incoming value from Latch. - PN->removeIncomingValue(Latch); - } -} - -bool LoopIndexSplit::splitLoop() { - SplitCondition = NULL; - if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE - || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ) - return false; - BasicBlock *Header = L->getHeader(); - BasicBlock *Latch = L->getLoopLatch(); - BranchInst *SBR = NULL; // Split Condition Branch - BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator()); - // If Exiting block includes loop variant instructions then this - // loop may not be split safely. - BasicBlock *ExitingBlock = ExitCondition->getParent(); - if (!cleanBlock(ExitingBlock)) return false; - - LLVMContext &Context = Header->getContext(); - - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator()); - if (!BR || BR->isUnconditional()) continue; - ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition()); - if (!CI || CI == ExitCondition - || CI->getPredicate() == ICmpInst::ICMP_NE - || CI->getPredicate() == ICmpInst::ICMP_EQ) - continue; - - // Unable to handle triangle loops at the moment. - // In triangle loop, split condition is in header and one of the - // the split destination is loop latch. If split condition is EQ - // then such loops are already handle in processOneIterationLoop(). - if (Header == (*I) - && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1))) - continue; - - // If the block does not dominate the latch then this is not a diamond. - // Such loop may not benefit from index split. - if (!DT->dominates((*I), Latch)) - continue; - - // If split condition branches heads do not have single predecessor, - // SplitCondBlock, then is not possible to remove inactive branch. - if (!BR->getSuccessor(0)->getSinglePredecessor() - || !BR->getSuccessor(1)->getSinglePredecessor()) - return false; - - // If the merge point for BR is not loop latch then skip this condition. - if (BR->getSuccessor(0) != Latch) { - DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0)); - assert (DF0 != DF->end() && "Unable to find dominance frontier"); - if (!DF0->second.count(Latch)) - continue; - } - - if (BR->getSuccessor(1) != Latch) { - DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1)); - assert (DF1 != DF->end() && "Unable to find dominance frontier"); - if (!DF1->second.count(Latch)) - continue; - } - SplitCondition = CI; - SBR = BR; - break; - } - - if (!SplitCondition) - return false; - - // If the predicate sign does not match then skip. - if (ExitCondition->isSigned() != SplitCondition->isSigned()) - return false; - - unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue); - unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0)); - Value *SplitValue = SplitCondition->getOperand(SVOpNum); - if (!L->isLoopInvariant(SplitValue)) - return false; - if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum))) - return false; - - // Check for side effects. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - - assert(DT->dominates(Header, BB)); - if (DT->properlyDominates(SplitCondition->getParent(), BB)) - continue; - - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - Instruction *Inst = BI; - - if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst) - && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) - return false; - } - } - - // Normalize loop conditions so that it is easier to calculate new loop - // bounds. - if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) { - ExitCondition->setPredicate(ExitCondition->getInversePredicate()); - BasicBlock *T = EBR->getSuccessor(0); - EBR->setSuccessor(0, EBR->getSuccessor(1)); - EBR->setSuccessor(1, T); - } - - if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) { - SplitCondition->setPredicate(SplitCondition->getInversePredicate()); - BasicBlock *T = SBR->getSuccessor(0); - SBR->setSuccessor(0, SBR->getSuccessor(1)); - SBR->setSuccessor(1, T); - } - - //[*] Calculate new loop bounds. - Value *AEV = SplitValue; - Value *BSV = SplitValue; - bool Sign = SplitCondition->isSigned(); - Instruction *PHTerm = L->getLoopPreheader()->getTerminator(); - - if (IVisLT(*ExitCondition)) { - if (IVisLT(*SplitCondition)) { - /* Do nothing */ - } - else if (IVisLE(*SplitCondition)) { - AEV = getPlusOne(SplitValue, Sign, PHTerm, Context); - BSV = getPlusOne(SplitValue, Sign, PHTerm, Context); - } else { - assert (0 && "Unexpected split condition!"); - } - } - else if (IVisLE(*ExitCondition)) { - if (IVisLT(*SplitCondition)) { - AEV = getMinusOne(SplitValue, Sign, PHTerm, Context); - } - else if (IVisLE(*SplitCondition)) { - BSV = getPlusOne(SplitValue, Sign, PHTerm, Context); - } else { - assert (0 && "Unexpected split condition!"); - } - } else { - assert (0 && "Unexpected exit condition!"); - } - AEV = getMin(AEV, IVExitValue, Sign, PHTerm); - BSV = getMax(BSV, IVStartValue, Sign, PHTerm); - - // [*] Clone Loop - ValueMap<const Value *, Value *> VMap; - Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this); - Loop *ALoop = L; - - // [*] ALoop's exiting edge enters BLoop's header. - // ALoop's original exit block becomes BLoop's exit block. - PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]); - BasicBlock *A_ExitingBlock = ExitCondition->getParent(); - BranchInst *A_ExitInsn = - dyn_cast<BranchInst>(A_ExitingBlock->getTerminator()); - assert (A_ExitInsn && "Unable to find suitable loop exit branch"); - BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1); - BasicBlock *B_Header = BLoop->getHeader(); - if (ALoop->contains(B_ExitBlock)) { - B_ExitBlock = A_ExitInsn->getSuccessor(0); - A_ExitInsn->setSuccessor(0, B_Header); - } else - A_ExitInsn->setSuccessor(1, B_Header); - - // [*] Update ALoop's exit value using new exit value. - ExitCondition->setOperand(EVOpNum, AEV); - - // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from - // original loop's preheader. Add incoming PHINode values from - // ALoop's exiting block. Update BLoop header's domiantor info. - - // Collect inverse map of Header PHINodes. - DenseMap<Value *, Value *> InverseMap; - for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), - BE = ALoop->getHeader()->end(); BI != BE; ++BI) { - if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PHINode *PNClone = cast<PHINode>(VMap[PN]); - InverseMap[PNClone] = PN; - } else - break; - } - - BasicBlock *A_Preheader = ALoop->getLoopPreheader(); - for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end(); - BI != BE; ++BI) { - if (PHINode *PN = dyn_cast<PHINode>(BI)) { - // Remove incoming value from original preheader. - PN->removeIncomingValue(A_Preheader); - - // Add incoming value from A_ExitingBlock. - if (PN == B_IndVar) - PN->addIncoming(BSV, A_ExitingBlock); - else { - PHINode *OrigPN = cast<PHINode>(InverseMap[PN]); - Value *V2 = NULL; - // If loop header is also loop exiting block then - // OrigPN is incoming value for B loop header. - if (A_ExitingBlock == ALoop->getHeader()) - V2 = OrigPN; - else - V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock); - PN->addIncoming(V2, A_ExitingBlock); - } - } else - break; - } - - DT->changeImmediateDominator(B_Header, A_ExitingBlock); - DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT); - - // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit - // block. Remove incoming PHINode values from ALoop's exiting block. - // Add new incoming values from BLoop's incoming exiting value. - // Update BLoop exit block's dominator info.. - BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]); - for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); - BI != BE; ++BI) { - if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], - B_ExitingBlock); - PN->removeIncomingValue(A_ExitingBlock); - } else - break; - } - - DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock); - DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT); - - //[*] Split ALoop's exit edge. This creates a new block which - // serves two purposes. First one is to hold PHINode defnitions - // to ensure that ALoop's LCSSA form. Second use it to act - // as a preheader for BLoop. - BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this); - - //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes - // in A_ExitBlock to redefine outgoing PHI definitions from ALoop. - for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end(); - BI != BE; ++BI) { - if (PHINode *PN = dyn_cast<PHINode>(BI)) { - Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock); - PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName()); - newPHI->addIncoming(V1, A_ExitingBlock); - A_ExitBlock->getInstList().push_front(newPHI); - PN->removeIncomingValue(A_ExitBlock); - PN->addIncoming(newPHI, A_ExitBlock); - } else - break; - } - - //[*] Eliminate split condition's inactive branch from ALoop. - BasicBlock *A_SplitCondBlock = SplitCondition->getParent(); - BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator()); - BasicBlock *A_InactiveBranch = NULL; - BasicBlock *A_ActiveBranch = NULL; - A_ActiveBranch = A_BR->getSuccessor(0); - A_InactiveBranch = A_BR->getSuccessor(1); - A_BR->setUnconditionalDest(A_ActiveBranch); - removeBlocks(A_InactiveBranch, L, A_ActiveBranch); - - //[*] Eliminate split condition's inactive branch in from BLoop. - BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]); - BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator()); - BasicBlock *B_InactiveBranch = NULL; - BasicBlock *B_ActiveBranch = NULL; - B_ActiveBranch = B_BR->getSuccessor(1); - B_InactiveBranch = B_BR->getSuccessor(0); - B_BR->setUnconditionalDest(B_ActiveBranch); - removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch); - - BasicBlock *A_Header = ALoop->getHeader(); - if (A_ExitingBlock == A_Header) - return true; - - //[*] Move exit condition into split condition block to avoid - // executing dead loop iteration. - ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]); - Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]); - ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]); - - moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, - cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, - ALoop, EVOpNum); - - moveExitCondition(B_SplitCondBlock, B_ActiveBranch, - B_ExitBlock, B_ExitCondition, - B_SplitCondition, B_IndVar, B_IndVarIncrement, - BLoop, EVOpNum); - - ++NumIndexSplit; - return true; -} - -/// cleanBlock - A block is considered clean if all non terminal instructions -/// are either, PHINodes, IV based. -bool LoopIndexSplit::cleanBlock(BasicBlock *BB) { - Instruction *Terminator = BB->getTerminator(); - for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - Instruction *I = BI; - - if (isa<PHINode>(I) || I == Terminator || I == ExitCondition - || I == SplitCondition || IVBasedValues.count(I) - || isa<DbgInfoIntrinsic>(I)) - continue; - - if (I->mayHaveSideEffects()) - return false; - - // I is used only inside this block then it is OK. - bool usedOutsideBB = false; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - Instruction *U = cast<Instruction>(*UI); - if (U->getParent() != BB) - usedOutsideBB = true; - } - if (!usedOutsideBB) - continue; - - // Otherwise we have a instruction that may not allow loop spliting. - return false; - } - return true; -} - -/// IVisLT - If Op is comparing IV based value with an loop invariant and -/// IV based value is less than the loop invariant then return the loop -/// invariant. Otherwise return NULL. -Value * LoopIndexSplit::IVisLT(ICmpInst &Op) { - ICmpInst::Predicate P = Op.getPredicate(); - if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) - && IVBasedValues.count(Op.getOperand(0)) - && L->isLoopInvariant(Op.getOperand(1))) - return Op.getOperand(1); - - if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) - && IVBasedValues.count(Op.getOperand(1)) - && L->isLoopInvariant(Op.getOperand(0))) - return Op.getOperand(0); - - return NULL; -} - -/// IVisLE - If Op is comparing IV based value with an loop invariant and -/// IV based value is less than or equal to the loop invariant then -/// return the loop invariant. Otherwise return NULL. -Value * LoopIndexSplit::IVisLE(ICmpInst &Op) { - ICmpInst::Predicate P = Op.getPredicate(); - if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE) - && IVBasedValues.count(Op.getOperand(0)) - && L->isLoopInvariant(Op.getOperand(1))) - return Op.getOperand(1); - - if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE) - && IVBasedValues.count(Op.getOperand(1)) - && L->isLoopInvariant(Op.getOperand(0))) - return Op.getOperand(0); - - return NULL; -} - -/// IVisGT - If Op is comparing IV based value with an loop invariant and -/// IV based value is greater than the loop invariant then return the loop -/// invariant. Otherwise return NULL. -Value * LoopIndexSplit::IVisGT(ICmpInst &Op) { - ICmpInst::Predicate P = Op.getPredicate(); - if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) - && IVBasedValues.count(Op.getOperand(0)) - && L->isLoopInvariant(Op.getOperand(1))) - return Op.getOperand(1); - - if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) - && IVBasedValues.count(Op.getOperand(1)) - && L->isLoopInvariant(Op.getOperand(0))) - return Op.getOperand(0); - - return NULL; -} - -/// IVisGE - If Op is comparing IV based value with an loop invariant and -/// IV based value is greater than or equal to the loop invariant then -/// return the loop invariant. Otherwise return NULL. -Value * LoopIndexSplit::IVisGE(ICmpInst &Op) { - ICmpInst::Predicate P = Op.getPredicate(); - if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE) - && IVBasedValues.count(Op.getOperand(0)) - && L->isLoopInvariant(Op.getOperand(1))) - return Op.getOperand(1); - - if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE) - && IVBasedValues.count(Op.getOperand(1)) - && L->isLoopInvariant(Op.getOperand(0))) - return Op.getOperand(0); - - return NULL; -} - diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp new file mode 100644 index 0000000..af25c5c --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -0,0 +1,170 @@ +//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs lightweight instruction simplification on loop bodies. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-instsimplify" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSimplified, "Number of redundant instructions simplified"); + +namespace { + class LoopInstSimplify : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopInstSimplify() : LoopPass(ID) { + initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop*, LPPassManager&); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + AU.addPreserved("scalar-evolution"); + } + }; +} + +char LoopInstSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", + "Simplify instructions in loops", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify", + "Simplify instructions in loops", false, false) + +Pass *llvm::createLoopInstSimplifyPass() { + return new LoopInstSimplify(); +} + +bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + LoopInfo *LI = &getAnalysis<LoopInfo>(); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + array_pod_sort(ExitBlocks.begin(), ExitBlocks.end()); + + SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + + // The bit we are stealing from the pointer represents whether this basic + // block is the header of a subloop, in which case we only process its phis. + typedef PointerIntPair<BasicBlock*, 1> WorklistItem; + SmallVector<WorklistItem, 16> VisitStack; + SmallPtrSet<BasicBlock*, 32> Visited; + + bool Changed = false; + bool LocalChanged; + do { + LocalChanged = false; + + VisitStack.clear(); + Visited.clear(); + + VisitStack.push_back(WorklistItem(L->getHeader(), false)); + + while (!VisitStack.empty()) { + WorklistItem Item = VisitStack.pop_back_val(); + BasicBlock *BB = Item.getPointer(); + bool IsSubloopHeader = Item.getInt(); + + // Simplify instructions in the current basic block. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = BI++; + + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + + // Don't bother simplifying unused instructions. + if (!I->use_empty()) { + Value *V = SimplifyInstruction(I, TD, DT); + if (V && LI->replacementPreservesLCSSAForm(I, V)) { + // Mark all uses for resimplification next time round the loop. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Next->insert(cast<Instruction>(*UI)); + + I->replaceAllUsesWith(V); + LocalChanged = true; + ++NumSimplified; + } + } + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + + if (IsSubloopHeader && !isa<PHINode>(I)) + break; + } + + // Add all successors to the worklist, except for loop exit blocks and the + // bodies of subloops. We visit the headers of loops so that we can process + // their phis, but we contract the rest of the subloop body and only follow + // edges leading back to the original loop. + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + BasicBlock *SuccBB = *SI; + if (!Visited.insert(SuccBB)) + continue; + + const Loop *SuccLoop = LI->getLoopFor(SuccBB); + if (SuccLoop && SuccLoop->getHeader() == SuccBB + && L->contains(SuccLoop)) { + VisitStack.push_back(WorklistItem(SuccBB, true)); + + SmallVector<BasicBlock*, 8> SubLoopExitBlocks; + SuccLoop->getExitBlocks(SubLoopExitBlocks); + + for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) { + BasicBlock *ExitBB = SubLoopExitBlocks[i]; + if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB)) + VisitStack.push_back(WorklistItem(ExitBB, false)); + } + + continue; + } + + bool IsExitBlock = std::binary_search(ExitBlocks.begin(), + ExitBlocks.end(), SuccBB); + if (IsExitBlock) + continue; + + VisitStack.push_back(WorklistItem(SuccBB, false)); + } + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + + Changed |= LocalChanged; + } while (LocalChanged); + + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 65acc1d..95e1578 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -15,16 +15,16 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; #define MAX_HEADER_SIZE 16 @@ -35,16 +35,13 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(ID) {} - - // Rotate Loop L as many times as possible. Return true if - // loop is rotated at least once. - bool runOnLoop(Loop *L, LPPassManager &LPM); + LoopRotate() : LoopPass(ID) { + initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + } // LCSSA form makes instruction renaming easier. virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); @@ -54,79 +51,119 @@ namespace { AU.addPreserved<ScalarEvolution>(); } - // Helper functions - - /// Do actual work - bool rotateLoop(Loop *L, LPPassManager &LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM); + bool rotateLoop(Loop *L); - /// Initialize local data - void initialize(); - - /// After loop rotation, loop pre-header has multiple sucessors. - /// Insert one forwarding basic block to ensure that loop pre-header - /// has only one successor. - void preserveCanonicalLoopForm(LPPassManager &LPM); - private: - Loop *L; - BasicBlock *OrigHeader; - BasicBlock *OrigPreHeader; - BasicBlock *OrigLatch; - BasicBlock *NewHeader; - BasicBlock *Exit; - LPPassManager *LPM_Ptr; + LoopInfo *LI; }; } char LoopRotate::ID = 0; -INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false); +INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } /// Rotate Loop L as many times as possible. Return true if /// the loop is rotated at least once. -bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) { - - bool RotatedOneLoop = false; - initialize(); - LPM_Ptr = &LPM; +bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { + LI = &getAnalysis<LoopInfo>(); // One loop can be rotated multiple times. - while (rotateLoop(Lp,LPM)) { - RotatedOneLoop = true; - initialize(); - } + bool MadeChange = false; + while (rotateLoop(L)) + MadeChange = true; - return RotatedOneLoop; + return MadeChange; } -/// Rotate loop LP. Return true if the loop is rotated. -bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { - L = Lp; - - OrigPreHeader = L->getLoopPreheader(); - if (!OrigPreHeader) return false; - - OrigLatch = L->getLoopLatch(); - if (!OrigLatch) return false; +/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the +/// old header into the preheader. If there were uses of the values produced by +/// these instruction that were outside of the loop, we have to insert PHI nodes +/// to merge the two values. Do this now. +static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, + BasicBlock *OrigPreheader, + ValueToValueMapTy &ValueMap) { + // Remove PHI node entries that are no longer live. + BasicBlock::iterator I, E = OrigHeader->end(); + for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) + PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + + // Now fix up users of the instructions in OrigHeader, inserting PHI nodes + // as necessary. + SSAUpdater SSA; + for (I = OrigHeader->begin(); I != E; ++I) { + Value *OrigHeaderVal = I; + + // If there are no uses of the value (e.g. because it returns void), there + // is nothing to rewrite. + if (OrigHeaderVal->use_empty()) + continue; + + Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal]; - OrigHeader = L->getHeader(); + // The value now exits in two versions: the initial value in the preheader + // and the loop "next" value in the original header. + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); + SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); + SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); + + // Visit each use of the OrigHeader instruction. + for (Value::use_iterator UI = OrigHeaderVal->use_begin(), + UE = OrigHeaderVal->use_end(); UI != UE; ) { + // Grab the use before incrementing the iterator. + Use &U = UI.getUse(); + + // Increment the iterator before removing the use from the list. + ++UI; + + // SSAUpdater can't handle a non-PHI use in the same block as an + // earlier def. We can easily handle those cases manually. + Instruction *UserInst = cast<Instruction>(U.getUser()); + if (!isa<PHINode>(UserInst)) { + BasicBlock *UserBB = UserInst->getParent(); + + // The original users in the OrigHeader are already using the + // original definitions. + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped. + if (UserBB == OrigPreheader) { + U = OrigPreHeaderVal; + continue; + } + } + + // Anything else can be handled by SSAUpdater. + SSA.RewriteUse(U); + } + } +} +/// Rotate loop LP. Return true if the loop is rotated. +bool LoopRotate::rotateLoop(Loop *L) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; - + + BasicBlock *OrigHeader = L->getHeader(); + + BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); + if (BI == 0 || BI->isUnconditional()) + return false; + // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. if (!L->isLoopExiting(OrigHeader)) return false; - BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); - if (!BI) - return false; - assert(BI->isConditional() && "Branch Instruction is not conditional"); - // Updating PHInodes in loops with multiple exits adds complexity. // Keep it simple, and restrict loop rotation to loops with one exit only. // In future, lift this restriction and support for multiple exits if @@ -136,24 +173,18 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { if (ExitBlocks.size() > 1) return false; - // Check size of original header and reject - // loop if it is very big. - unsigned Size = 0; - - // FIXME: Use common api to estimate size. - for (BasicBlock::const_iterator OI = OrigHeader->begin(), - OE = OrigHeader->end(); OI != OE; ++OI) { - if (isa<PHINode>(OI)) - continue; // PHI nodes don't count. - if (isa<DbgInfoIntrinsic>(OI)) - continue; // Debug intrinsics don't count as size. - ++Size; + // Check size of original header and reject loop if it is very big. + { + CodeMetrics Metrics; + Metrics.analyzeBasicBlock(OrigHeader); + if (Metrics.NumInsts > MAX_HEADER_SIZE) + return false; } - if (Size > MAX_HEADER_SIZE) - return false; - // Now, this loop is suitable for rotation. + BasicBlock *OrigPreheader = L->getLoopPreheader(); + BasicBlock *OrigLatch = L->getLoopLatch(); + assert(OrigPreheader && OrigLatch && "Loop not in canonical form?"); // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. @@ -163,8 +194,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. - Exit = BI->getSuccessor(0); - NewHeader = BI->getSuccessor(1); + BasicBlock *Exit = BI->getSuccessor(0); + BasicBlock *NewHeader = BI->getSuccessor(1); if (L->contains(Exit)) std::swap(Exit, NewHeader); assert(NewHeader && "Unable to determine new loop header"); @@ -180,20 +211,54 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // Begin by walking OrigHeader and populating ValueMap with an entry for // each Instruction. BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); - DenseMap<const Value *, Value *> ValueMap; + ValueToValueMapTy ValueMap; // For PHI nodes, the value available in OldPreHeader is just the // incoming value from OldPreHeader. for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) - ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader)); + ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); - // For the rest of the instructions, create a clone in the OldPreHeader. - TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator(); - for (; I != E; ++I) { - Instruction *C = I->clone(); - C->setName(I->getName()); - C->insertBefore(LoopEntryBranch); - ValueMap[I] = C; + // For the rest of the instructions, either hoist to the OrigPreheader if + // possible or create a clone in the OldPreHeader if not. + TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); + while (I != E) { + Instruction *Inst = I++; + + // If the instruction's operands are invariant and it doesn't read or write + // memory, then it is safe to hoist. Doing this doesn't change the order of + // execution in the preheader, but does prevent the instruction from + // executing in each iteration of the loop. This means it is safe to hoist + // something that might trap, but isn't safe to hoist something that reads + // memory (without proving that the loop doesn't write). + if (L->hasLoopInvariantOperands(Inst) && + !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && + !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) { + Inst->moveBefore(LoopEntryBranch); + continue; + } + + // Otherwise, create a duplicate of the instruction. + Instruction *C = Inst->clone(); + + // Eagerly remap the operands of the instruction. + RemapInstruction(C, ValueMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + + // With the operands remapped, see if the instruction constant folds or is + // otherwise simplifyable. This commonly occurs because the entry from PHI + // nodes allows icmps and other instructions to fold. + Value *V = SimplifyInstruction(C); + if (V && LI->replacementPreservesLCSSAForm(C, V)) { + // If so, then delete the temporary instruction and stick the folded value + // in the map. + delete C; + ValueMap[Inst] = V; + } else { + // Otherwise, stick the new instruction into the new block! + C->setName(Inst->getName()); + C->insertBefore(LoopEntryBranch); + ValueMap[Inst] = C; + } } // Along with all the other instructions, we just cloned OrigHeader's @@ -203,221 +268,81 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); PHINode *PN = dyn_cast<PHINode>(BI); ++BI) - PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader); + PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove // OrigPreHeader's old terminator (the original branch into the loop), and // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); - for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) - PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader)); - // Now fix up users of the instructions in OrigHeader, inserting PHI nodes - // as necessary. - SSAUpdater SSA; - for (I = OrigHeader->begin(); I != E; ++I) { - Value *OrigHeaderVal = I; - Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal]; - - // The value now exits in two versions: the initial value in the preheader - // and the loop "next" value in the original header. - SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); - SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); - SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal); - - // Visit each use of the OrigHeader instruction. - for (Value::use_iterator UI = OrigHeaderVal->use_begin(), - UE = OrigHeaderVal->use_end(); UI != UE; ) { - // Grab the use before incrementing the iterator. - Use &U = UI.getUse(); - - // Increment the iterator before removing the use from the list. - ++UI; - - // SSAUpdater can't handle a non-PHI use in the same block as an - // earlier def. We can easily handle those cases manually. - Instruction *UserInst = cast<Instruction>(U.getUser()); - if (!isa<PHINode>(UserInst)) { - BasicBlock *UserBB = UserInst->getParent(); - - // The original users in the OrigHeader are already using the - // original definitions. - if (UserBB == OrigHeader) - continue; - - // Users in the OrigPreHeader need to use the value to which the - // original definitions are mapped. - if (UserBB == OrigPreHeader) { - U = OrigPreHeaderVal; - continue; - } - } - - // Anything else can be handled by SSAUpdater. - SSA.RewriteUse(U); - } - } + // If there were any uses of instructions in the duplicated block outside the + // loop, update them, inserting PHI nodes as required + RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap); // NewHeader is now the header of the loop. L->moveToHeader(NewHeader); + assert(L->getHeader() == NewHeader && "Latch block is our new header"); - // Move the original header to the bottom of the loop, where it now more - // naturally belongs. This isn't necessary for correctness, and CodeGen can - // usually reorder blocks on its own to fix things like this up, but it's - // still nice to keep the IR readable. - // - // The original header should have only one predecessor at this point, since - // we checked that the loop had a proper preheader and unique backedge before - // we started. - assert(OrigHeader->getSinglePredecessor() && - "Original loop header has too many predecessors after loop rotation!"); - OrigHeader->moveAfter(OrigHeader->getSinglePredecessor()); - - // Also, since this original header only has one predecessor, zap its - // PHI nodes, which are now trivial. - FoldSingleEntryPHINodes(OrigHeader); - - // TODO: We could just go ahead and merge OrigHeader into its predecessor - // at this point, if we don't mind updating dominator info. - - // Establish a new preheader, update dominators, etc. - preserveCanonicalLoopForm(LPM); - - ++NumRotated; - return true; -} - -/// Initialize local data -void LoopRotate::initialize() { - L = NULL; - OrigHeader = NULL; - OrigPreHeader = NULL; - NewHeader = NULL; - Exit = NULL; -} - -/// After loop rotation, loop pre-header has multiple sucessors. -/// Insert one forwarding basic block to ensure that loop pre-header -/// has only one successor. -void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { - - // Right now original pre-header has two successors, new header and - // exit block. Insert new block between original pre-header and - // new header such that loop's new pre-header has only one successor. - BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(), - "bb.nph", - OrigHeader->getParent(), - NewHeader); - LoopInfo &LI = getAnalysis<LoopInfo>(); - if (Loop *PL = LI.getLoopFor(OrigPreHeader)) - PL->addBasicBlockToLoop(NewPreHeader, LI.getBase()); - BranchInst::Create(NewHeader, NewPreHeader); - BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator()); - if (OrigPH_BI->getSuccessor(0) == NewHeader) - OrigPH_BI->setSuccessor(0, NewPreHeader); - else { - assert(OrigPH_BI->getSuccessor(1) == NewHeader && - "Unexpected original pre-header terminator"); - OrigPH_BI->setSuccessor(1, NewPreHeader); - } - - PHINode *PN; - for (BasicBlock::iterator I = NewHeader->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { - int index = PN->getBasicBlockIndex(OrigPreHeader); - assert(index != -1 && "Expected incoming value from Original PreHeader"); - PN->setIncomingBlock(index, NewPreHeader); - assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 && - "Expected only one incoming value from Original PreHeader"); - } - - if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - DT->addNewBlock(NewPreHeader, OrigPreHeader); - DT->changeImmediateDominator(L->getHeader(), NewPreHeader); - DT->changeImmediateDominator(Exit, OrigPreHeader); - for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - BasicBlock *B = *BI; - if (L->getHeader() != B) { - DomTreeNode *Node = DT->getNode(B); - if (Node && Node->getBlock() == OrigHeader) - DT->changeImmediateDominator(*BI, L->getHeader()); - } - } - DT->changeImmediateDominator(OrigHeader, OrigLatch); - } - - if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) { - // New Preheader's dominance frontier is Exit block. - DominanceFrontier::DomSetType NewPHSet; - NewPHSet.insert(Exit); - DF->addBasicBlock(NewPreHeader, NewPHSet); - - // New Header's dominance frontier now includes itself and Exit block - DominanceFrontier::iterator HeadI = DF->find(L->getHeader()); - if (HeadI != DF->end()) { - DominanceFrontier::DomSetType & HeaderSet = HeadI->second; - HeaderSet.clear(); - HeaderSet.insert(L->getHeader()); - HeaderSet.insert(Exit); - } else { - DominanceFrontier::DomSetType HeaderSet; - HeaderSet.insert(L->getHeader()); - HeaderSet.insert(Exit); - DF->addBasicBlock(L->getHeader(), HeaderSet); - } - - // Original header (new Loop Latch)'s dominance frontier is Exit. - DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch()); - if (LatchI != DF->end()) { - DominanceFrontier::DomSetType &LatchSet = LatchI->second; - LatchSet = LatchI->second; - LatchSet.clear(); - LatchSet.insert(Exit); - } else { - DominanceFrontier::DomSetType LatchSet; - LatchSet.insert(Exit); - DF->addBasicBlock(L->getHeader(), LatchSet); + // At this point, we've finished our major CFG changes. As part of cloning + // the loop into the preheader we've simplified instructions and the + // duplicated conditional branch may now be branching on a constant. If it is + // branching on a constant and if that constant means that we enter the loop, + // then we fold away the cond branch to an uncond branch. This simplifies the + // loop in cases important for nested loops, and it also means we don't have + // to split as many edges. + BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); + assert(PHBI->isConditional() && "Should be clone of BI condbr!"); + if (!isa<ConstantInt>(PHBI->getCondition()) || + PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) + != NewHeader) { + // The conditional branch can't be folded, handle the general case. + // Update DominatorTree to reflect the CFG change we just made. Then split + // edges as necessary to preserve LoopSimplify form. + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { + // Since OrigPreheader now has the conditional branch to Exit block, it is + // the dominator of Exit. + DT->changeImmediateDominator(Exit, OrigPreheader); + DT->changeImmediateDominator(NewHeader, OrigPreheader); + + // Update OrigHeader to be dominated by the new header block. + DT->changeImmediateDominator(OrigHeader, OrigLatch); } - - // If a loop block dominates new loop latch then add to its frontiers - // new header and Exit and remove new latch (which is equal to original - // header). - BasicBlock *NewLatch = L->getLoopLatch(); - - assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader"); - + + // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and + // thus is not a preheader anymore. Split the edge to form a real preheader. + BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); + NewPH->setName(NewHeader->getName() + ".lr.ph"); + + // Preserve canonical loop form, which means that 'Exit' should have only one + // predecessor. + BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this); + ExitSplit->moveBefore(Exit); + } else { + // We can fold the conditional branch in the preheader, this makes things + // simpler. The first step is to remove the extra edge to the Exit block. + Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + BranchInst::Create(NewHeader, PHBI); + PHBI->eraseFromParent(); + + // With our CFG finalized, update DomTree if it is available. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - BasicBlock *B = *BI; - if (DT->dominates(B, NewLatch)) { - DominanceFrontier::iterator BDFI = DF->find(B); - if (BDFI != DF->end()) { - DominanceFrontier::DomSetType &BSet = BDFI->second; - BSet.erase(NewLatch); - BSet.insert(L->getHeader()); - BSet.insert(Exit); - } else { - DominanceFrontier::DomSetType BSet; - BSet.insert(L->getHeader()); - BSet.insert(Exit); - DF->addBasicBlock(B, BSet); - } - } - } + // Update OrigHeader to be dominated by the new header block. + DT->changeImmediateDominator(NewHeader, OrigPreheader); + DT->changeImmediateDominator(OrigHeader, OrigLatch); } } - - // Preserve canonical loop form, which means Exit block should - // have only one predecessor. - SplitEdge(L->getLoopLatch(), Exit, this); - - assert(NewHeader && L->getHeader() == NewHeader && - "Invalid loop header after loop rotation"); - assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader && - "Invalid loop preheader after loop rotation"); - assert(L->getLoopLatch() && - "Invalid loop latch after loop rotation"); + + assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); + assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + + // Now that the CFG and DomTree are in a consistent state again, try to merge + // the OrigHeader block into OrigLatch. This will succeed if they are + // connected by an unconditional branch. This is just a cleanup so the + // emitted code isn't too gross in this common case. + MergeBlockIntoPredecessor(OrigHeader, this); + + ++NumRotated; + return true; } + diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index e8dc5d3..ac4aea2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -63,6 +63,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallBitVector.h" @@ -113,7 +114,7 @@ class RegUseTracker { public: void CountRegister(const SCEV *Reg, size_t LUIdx); void DropRegister(const SCEV *Reg, size_t LUIdx); - void DropUse(size_t LUIdx); + void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -152,11 +153,19 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { } void -RegUseTracker::DropUse(size_t LUIdx) { - // Remove the use index from every register's use list. +RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) { + assert(LUIdx <= LastLUIdx); + + // Update RegUses. The data structure is not optimized for this purpose; + // we must iterate through it and update each of the bit vectors. for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); - I != E; ++I) - I->second.UsedByIndices.reset(LUIdx); + I != E; ++I) { + SmallBitVector &UsedByIndices = I->second.UsedByIndices; + if (LUIdx < UsedByIndices.size()) + UsedByIndices[LUIdx] = + LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0; + UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); + } } bool @@ -202,8 +211,7 @@ struct Formula { Formula() : ScaledReg(0) {} - void InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); unsigned getNumRegs() const; const Type *getType() const; @@ -224,9 +232,9 @@ struct Formula { static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl<const SCEV *> &Good, SmallVectorImpl<const SCEV *> &Bad, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE) { // Collect expressions which properly dominate the loop header. - if (S->properlyDominates(L->getHeader(), &DT)) { + if (SE.properlyDominates(S, L->getHeader())) { Good.push_back(S); return; } @@ -235,18 +243,18 @@ static void DoInitialMatch(const SCEV *S, Loop *L, if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - DoInitialMatch(*I, L, Good, Bad, SE, DT); + DoInitialMatch(*I, L, Good, Bad, SE); return; } // Look at addrec operands. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) if (!AR->getStart()->isZero()) { - DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); + DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), AR->getLoop()), - L, Good, Bad, SE, DT); + L, Good, Bad, SE); return; } @@ -258,7 +266,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L, SmallVector<const SCEV *, 4> MyGood; SmallVector<const SCEV *, 4> MyBad; - DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); + DoInitialMatch(NewMul, L, MyGood, MyBad, SE); const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( SE.getEffectiveSCEVType(NewMul->getType()))); for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), @@ -278,11 +286,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L, /// InitialMatch - Incorporate loop-variant parts of S into this Formula, /// attempting to keep all loop-invariant and loop-computable values in a /// single base register. -void Formula::InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { +void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { SmallVector<const SCEV *, 4> Good; SmallVector<const SCEV *, 4> Bad; - DoInitialMatch(S, L, Good, Bad, SE, DT); + DoInitialMatch(S, L, Good, Bad, SE); if (!Good.empty()) { const SCEV *Sum = SE.getAddExpr(Good); if (!Sum->isZero()) @@ -608,7 +615,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); + Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; @@ -645,8 +652,6 @@ public: : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), SetupCost(0) {} - unsigned getNumRegs() const { return NumRegs; } - bool operator<(const Cost &Other) const; void Loose(); @@ -722,6 +727,9 @@ void Cost::RateRegister(const SCEV *Reg, (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) ++SetupCost; + + NumIVMuls += isa<SCEVMulExpr>(Reg) && + SE.hasComputableLoopEvolution(Reg, L); } /// RatePrimaryRegister - Record this register in the set. If we haven't seen it @@ -756,9 +764,6 @@ void Cost::RateFormula(const Formula &F, return; } RatePrimaryRegister(BaseReg, Regs, L, SE, DT); - - NumIVMuls += isa<SCEVMulExpr>(BaseReg) && - BaseReg->hasComputableLoopEvolution(L); } if (F.BaseRegs.size() > 1) @@ -1257,32 +1262,6 @@ struct UseMapDenseMapInfo { } }; -/// FormulaSorter - This class implements an ordering for formulae which sorts -/// the by their standalone cost. -class FormulaSorter { - /// These two sets are kept empty, so that we compute standalone costs. - DenseSet<const SCEV *> VisitedRegs; - SmallPtrSet<const SCEV *, 16> Regs; - Loop *L; - LSRUse *LU; - ScalarEvolution &SE; - DominatorTree &DT; - -public: - FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt) - : L(l), LU(&lu), SE(se), DT(dt) {} - - bool operator()(const Formula &A, const Formula &B) { - Cost CostA; - CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT); - Regs.clear(); - Cost CostB; - CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT); - Regs.clear(); - return CostA < CostB; - } -}; - /// LSRInstance - This class holds state for the main loop strength reduction /// logic. class LSRInstance { @@ -1341,7 +1320,7 @@ class LSRInstance { LSRUse::KindType Kind, const Type *AccessTy); - void DeleteUse(LSRUse &LU); + void DeleteUse(LSRUse &LU, size_t LUIdx); LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); @@ -1925,10 +1904,13 @@ LSRInstance::getUse(const SCEV *&Expr, } /// DeleteUse - Delete the given use from the Uses list. -void LSRInstance::DeleteUse(LSRUse &LU) { +void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { if (&LU != &Uses.back()) std::swap(LU, Uses.back()); Uses.pop_back(); + + // Update RegUses. + RegUses.SwapAndDropUse(LUIdx, Uses.size()); } /// FindUseWithFormula - Look for a use distinct from OrigLU which is has @@ -2073,7 +2055,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (N->isLoopInvariant(L)) { + if (SE.isLoopInvariant(N, L)) { Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -2113,7 +2095,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; - F.InitialMatch(S, L, SE, DT); + F.InitialMatch(S, L, SE); bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Initial formula already exists!"); (void)Inserted; } @@ -2213,7 +2195,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { unsigned OtherIdx = !UI.getOperandNo(); Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx)); - if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L)) + if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L)) continue; } @@ -2296,7 +2278,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Loop-variant "unknown" values are uninteresting; we won't be able to // do anything meaningful with them. - if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L)) + if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) continue; // Don't pull a constant into a register if the constant could be folded @@ -2347,8 +2329,8 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, for (SmallVectorImpl<const SCEV *>::const_iterator I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) { const SCEV *BaseReg = *I; - if (BaseReg->properlyDominates(L->getHeader(), &DT) && - !BaseReg->hasComputableLoopEvolution(L)) + if (SE.properlyDominates(BaseReg, L->getHeader()) && + !SE.hasComputableLoopEvolution(BaseReg, L)) Ops.push_back(BaseReg); else F.BaseRegs.push_back(BaseReg); @@ -2813,9 +2795,11 @@ LSRInstance::GenerateAllReuseFormulae() { print_uses(dbgs())); } -/// If their are multiple formulae with the same set of registers used +/// If there are multiple formulae with the same set of registers used /// by other uses, pick the best one and delete the others. void LSRInstance::FilterOutUndesirableDedicatedRegisters() { + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; #ifndef NDEBUG bool ChangedFormulae = false; #endif @@ -2828,7 +2812,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; - FormulaSorter Sorter(L, LU, SE, DT); DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); bool Any = false; @@ -2854,7 +2837,14 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { BestFormulae.insert(std::make_pair(Key, FIdx)); if (!P.second) { Formula &Best = LU.Formulae[P.first->second]; - if (Sorter.operator()(F, Best)) + + Cost CostF; + CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT); + Regs.clear(); + Cost CostBest; + CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); + Regs.clear(); + if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -2894,7 +2884,7 @@ static const size_t ComplexityLimit = UINT16_MAX; /// this many solutions because it prune the search space, but the pruning /// isn't always sufficient. size_t LSRInstance::EstimateSearchSpaceComplexity() const { - uint32_t Power = 1; + size_t Power = 1; for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { size_t FSize = I->Formulae.size(); @@ -3001,6 +2991,28 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.AM.BaseOffs; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; + } + DEBUG(dbgs() << "New fixup has offset " + << Fixup.Offset << '\n'); + } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; + } + // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { @@ -3019,22 +3031,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (Any) LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); - // Update the relocs to reference the new use. - for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.AM.BaseOffs; - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } - // Delete the old use. - DeleteUse(LU); + DeleteUse(LU, LUIdx); --LUIdx; --NumUses; break; @@ -3546,21 +3544,23 @@ void LSRInstance::RewriteForPHI(PHINode *PN, // is the canonical backedge for this loop, which complicates post-inc // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(BB->getTerminator()) && - (PN->getParent() != L->getHeader() || !L->contains(BB))) { - // Split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); - - // If PN is outside of the loop and BB is in the loop, we want to - // move the block to be immediately before the PHI block, not - // immediately after BB. - if (L->contains(BB) && !L->contains(PN)) - NewBB->moveBefore(PN->getParent()); - - // Splitting the edge can reduce the number of PHI entries we have. - e = PN->getNumIncomingValues(); - BB = NewBB; - i = PN->getBasicBlockIndex(BB); + !isa<IndirectBrInst>(BB->getTerminator())) { + Loop *PNLoop = LI.getLoopFor(PN->getParent()); + if (!PNLoop || PN->getParent() != PNLoop->getHeader()) { + // Split the critical edge. + BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); + + // If PN is outside of the loop and BB is in the loop, we want to + // move the block to be immediately before the PHI block, not + // immediately after BB. + if (L->contains(BB) && !L->contains(PN)) + NewBB->moveBefore(PN->getParent()); + + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); + BB = NewBB; + i = PN->getBasicBlockIndex(BB); + } } std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = @@ -3792,21 +3792,30 @@ private: } char LoopStrengthReduce::ID = 0; -INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce", - "Loop Strength Reduction", false, false); +INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(IVUsers) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) + Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(ID), TLI(tli) {} + : LoopPass(ID), TLI(tli) { + initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); + } void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("domfrontier"); AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); @@ -3815,6 +3824,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); + // Requiring LoopSimplify a second time here prevents IVUsers from running + // twice, since LoopSimplify was invalidated by running ScalarEvolution. + AU.addRequiredID(LoopSimplifyID); AU.addRequired<IVUsers>(); AU.addPreserved<IVUsers>(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index d0edfa2..80b263a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -16,7 +16,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -27,7 +27,7 @@ using namespace llvm; static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden, +UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); static cl::opt<unsigned> @@ -43,12 +43,20 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(ID) {} + LoopUnroll() : LoopPass(ID) { + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. static const unsigned NoThreshold = UINT_MAX; + + // Threshold to use when optsize is specified (and there is no + // explicit -unroll-threshold). + static const unsigned OptSizeUnrollThreshold = 50; + + unsigned CurrentThreshold; bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -73,7 +81,11 @@ namespace { } char LoopUnroll::ID = 0; -INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false); +INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } @@ -83,8 +95,16 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I); - NumCalls = Metrics.NumCalls; - return Metrics.NumInsts; + NumCalls = Metrics.NumInlineCandidates; + + unsigned LoopSize = Metrics.NumInsts; + + // Don't allow an estimate of size zero. This would allows unrolling of loops + // with huge iteration counts, which is a compile time problem even if it's + // not a problem for code quality. + if (LoopSize == 0) LoopSize = 1; + + return LoopSize; } bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -94,6 +114,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); (void)Header; + + // Determine the current unrolling threshold. While this is normally set + // from UnrollThreshold, it is overridden to a smaller value if the current + // function is marked as optimize-for-size, and the unroll threshold was + // not user specified. + CurrentThreshold = UnrollThreshold; + if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) && + UnrollThreshold.getNumOccurrences() == 0) + CurrentThreshold = OptSizeUnrollThreshold; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); @@ -111,25 +140,25 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Enforce the threshold. - if (UnrollThreshold != NoThreshold) { - unsigned NumCalls; - unsigned LoopSize = ApproximateLoopSize(L, NumCalls); + if (CurrentThreshold != NoThreshold) { + unsigned NumInlineCandidates; + unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); - if (NumCalls != 0) { - DEBUG(dbgs() << " Not unrolling loop with function calls.\n"); + if (NumInlineCandidates != 0) { + DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > UnrollThreshold) { + if (TripCount != 1 && Size > CurrentThreshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << UnrollThreshold << "\n"); + << " because size: " << Size << ">" << CurrentThreshold << "\n"); if (!UnrollAllowPartial) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = UnrollThreshold / LoopSize; + Count = CurrentThreshold / LoopSize; while (Count != 0 && TripCount%Count != 0) { Count--; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 9afe428..b4e3d31 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,12 +32,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -93,7 +93,9 @@ namespace { explicit LoopUnswitch(bool Os = false) : LoopPass(ID), OptimizeForSize(Os), redoLoop(false), currentLoop(NULL), DT(NULL), loopHeader(NULL), - loopPreheader(NULL) {} + loopPreheader(NULL) { + initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); + } bool runOnLoop(Loop *L, LPPassManager &LPM); bool processCurrentLoop(); @@ -109,6 +111,7 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<DominatorTree>(); + AU.addPreserved<ScalarEvolution>(); } private: @@ -158,7 +161,13 @@ namespace { }; } char LoopUnswitch::ID = 0; -INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false); +INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", + false, false) Pass *llvm::createLoopUnswitchPass(bool Os) { return new LoopUnswitch(Os); @@ -450,22 +459,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { return true; } -// RemapInstruction - Convert the instruction operands from referencing the -// current values into those specified by VMap. -// -static inline void RemapInstruction(Instruction *I, - ValueMap<const Value *, Value*> &VMap) { - for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { - Value *Op = I->getOperand(op); - ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); - if (It != VMap.end()) Op = It->second; - I->setOperand(op, Op); - } -} - /// CloneLoop - Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. -static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM, +static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); LPM->insertLoop(New, PL); @@ -580,6 +576,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); + if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) + SE->forgetLoop(L); + LoopBlocks.clear(); NewBlocks.clear(); @@ -609,7 +608,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); - ValueMap<const Value*, Value*> VMap; + ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); @@ -647,7 +646,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); - ValueMap<const Value *, Value*>::iterator It = VMap.find(V); + ValueToValueMapTy::iterator It = VMap.find(V); if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } @@ -657,7 +656,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, VMap); + RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); @@ -961,13 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { while (!Worklist.empty()) { Instruction *I = Worklist.back(); Worklist.pop_back(); - - // Simple constant folding. - if (Constant *C = ConstantFoldInstruction(I)) { - ReplaceUsesOfWith(I, C, Worklist, L, LPM); - continue; - } - + // Simple DCE. if (isInstructionTriviallyDead(I)) { DEBUG(dbgs() << "Remove dead instruction '" << *I); @@ -982,15 +975,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { ++NumSimplify; continue; } - + // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. - if (Value *V = SimplifyInstruction(I)) { - ReplaceUsesOfWith(I, V, Worklist, L, LPM); - continue; - } - + if (Value *V = SimplifyInstruction(I, 0, DT)) + if (LI->replacementPreservesLCSSAForm(I, V)) { + ReplaceUsesOfWith(I, V, Worklist, L, LPM); + continue; + } + // Special case hacks that appear commonly in unswitched code. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { if (BI->isUnconditional()) { diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp index 973ffe7..9087b46 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp @@ -14,26 +14,15 @@ #define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" #include "llvm/Function.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/IRBuilder.h" - using namespace llvm; -namespace { - -bool LowerAtomicIntrinsic(CallInst *CI) { - IRBuilder<> Builder(CI->getParent(), CI); - - Function *Callee = CI->getCalledFunction(); - if (!Callee) - return false; - - unsigned IID = Callee->getIntrinsicID(); +static bool LowerAtomicIntrinsic(IntrinsicInst *II) { + IRBuilder<> Builder(II->getParent(), II); + unsigned IID = II->getIntrinsicID(); switch (IID) { case Intrinsic::memory_barrier: break; @@ -48,80 +37,70 @@ bool LowerAtomicIntrinsic(CallInst *CI) { case Intrinsic::atomic_load_min: case Intrinsic::atomic_load_umax: case Intrinsic::atomic_load_umin: { - Value *Ptr = CI->getArgOperand(0); - Value *Delta = CI->getArgOperand(1); + Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1); LoadInst *Orig = Builder.CreateLoad(Ptr); Value *Res = NULL; switch (IID) { - default: assert(0 && "Unrecognized atomic modify operation"); - case Intrinsic::atomic_load_add: - Res = Builder.CreateAdd(Orig, Delta); - break; - case Intrinsic::atomic_load_sub: - Res = Builder.CreateSub(Orig, Delta); - break; - case Intrinsic::atomic_load_and: - Res = Builder.CreateAnd(Orig, Delta); - break; - case Intrinsic::atomic_load_nand: - Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); - break; - case Intrinsic::atomic_load_or: - Res = Builder.CreateOr(Orig, Delta); - break; - case Intrinsic::atomic_load_xor: - Res = Builder.CreateXor(Orig, Delta); - break; - case Intrinsic::atomic_load_max: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), - Delta, - Orig); - break; - case Intrinsic::atomic_load_min: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), - Orig, - Delta); - break; - case Intrinsic::atomic_load_umax: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), - Delta, - Orig); - break; - case Intrinsic::atomic_load_umin: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), - Orig, - Delta); - break; + default: assert(0 && "Unrecognized atomic modify operation"); + case Intrinsic::atomic_load_add: + Res = Builder.CreateAdd(Orig, Delta); + break; + case Intrinsic::atomic_load_sub: + Res = Builder.CreateSub(Orig, Delta); + break; + case Intrinsic::atomic_load_and: + Res = Builder.CreateAnd(Orig, Delta); + break; + case Intrinsic::atomic_load_nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); + break; + case Intrinsic::atomic_load_or: + Res = Builder.CreateOr(Orig, Delta); + break; + case Intrinsic::atomic_load_xor: + Res = Builder.CreateXor(Orig, Delta); + break; + case Intrinsic::atomic_load_max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Delta, Orig); + break; + case Intrinsic::atomic_load_min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Orig, Delta); + break; + case Intrinsic::atomic_load_umax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Delta, Orig); + break; + case Intrinsic::atomic_load_umin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Orig, Delta); + break; } Builder.CreateStore(Res, Ptr); - CI->replaceAllUsesWith(Orig); + II->replaceAllUsesWith(Orig); break; } case Intrinsic::atomic_swap: { - Value *Ptr = CI->getArgOperand(0); - Value *Val = CI->getArgOperand(1); - + Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1); LoadInst *Orig = Builder.CreateLoad(Ptr); Builder.CreateStore(Val, Ptr); - - CI->replaceAllUsesWith(Orig); + II->replaceAllUsesWith(Orig); break; } case Intrinsic::atomic_cmp_swap: { - Value *Ptr = CI->getArgOperand(0); - Value *Cmp = CI->getArgOperand(1); - Value *Val = CI->getArgOperand(2); + Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1); + Value *Val = II->getArgOperand(2); LoadInst *Orig = Builder.CreateLoad(Ptr); Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - - CI->replaceAllUsesWith(Orig); + II->replaceAllUsesWith(Orig); break; } @@ -129,33 +108,32 @@ bool LowerAtomicIntrinsic(CallInst *CI) { return false; } - assert(CI->use_empty() && + assert(II->use_empty() && "Lowering should have eliminated any uses of the intrinsic call!"); - CI->eraseFromParent(); + II->eraseFromParent(); return true; } -struct LowerAtomic : public BasicBlockPass { - static char ID; - LowerAtomic() : BasicBlockPass(ID) {} - bool runOnBasicBlock(BasicBlock &BB) { - bool Changed = false; - for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { - Instruction *Inst = DI++; - if (CallInst *CI = dyn_cast<CallInst>(Inst)) - Changed |= LowerAtomicIntrinsic(CI); +namespace { + struct LowerAtomic : public BasicBlockPass { + static char ID; + LowerAtomic() : BasicBlockPass(ID) { + initializeLowerAtomicPass(*PassRegistry::getPassRegistry()); } - return Changed; - } - -}; - + bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++)) + Changed |= LowerAtomicIntrinsic(II); + return Changed; + } + }; } char LowerAtomic::ID = 0; INITIALIZE_PASS(LowerAtomic, "loweratomic", "Lower atomic intrinsics to non-atomic form", - false, false); + false, false) Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 24fae42..bde0e53 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -14,16 +14,18 @@ #define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" +#include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include <list> @@ -32,62 +34,10 @@ using namespace llvm; STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); - -/// isBytewiseValue - If the specified value can be set by repeating the same -/// byte in memory, return the i8 value that it is represented with. This is -/// true for all i8 values obviously, but is also true for i32 0, i32 -1, -/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated -/// byte store (e.g. i16 0x1234), return null. -static Value *isBytewiseValue(Value *V) { - LLVMContext &Context = V->getContext(); - - // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType()->isIntegerTy(8)) return V; - - // Constant float and double values can be handled as integer values if the - // corresponding integer value is "byteable". An important case is 0.0. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { - if (CFP->getType()->isFloatTy()) - V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context)); - if (CFP->getType()->isDoubleTy()) - V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context)); - // Don't handle long double formats, which have strange constraints. - } - - // We can handle constant integers that are power of two in size and a - // multiple of 8 bits. - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - unsigned Width = CI->getBitWidth(); - if (isPowerOf2_32(Width) && Width > 8) { - // We can handle this value if the recursive binary decomposition is the - // same at all levels. - APInt Val = CI->getValue(); - APInt Val2; - while (Val.getBitWidth() != 8) { - unsigned NextWidth = Val.getBitWidth()/2; - Val2 = Val.lshr(NextWidth); - Val2.trunc(Val.getBitWidth()/2); - Val.trunc(Val.getBitWidth()/2); - - // If the top/bottom halves aren't the same, reject it. - if (Val != Val2) - return 0; - } - return ConstantInt::get(Context, Val); - } - } - - // Conceptually, we could handle things like: - // %a = zext i8 %X to i16 - // %b = shl i16 %a, 8 - // %c = or i16 %a, %b - // but until there is an example that actually needs this, it doesn't seem - // worth worrying about. - return 0; -} +STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, - bool &VariableIdxFound, TargetData &TD) { + bool &VariableIdxFound, const TargetData &TD){ // Skip over the first indices. gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) @@ -120,14 +70,31 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, /// constant offset, and return that constant offset. For example, Ptr1 might /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, - TargetData &TD) { + const TargetData &TD) { + Ptr1 = Ptr1->stripPointerCasts(); + Ptr2 = Ptr2->stripPointerCasts(); + GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); + GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); + + bool VariableIdxFound = false; + + // If one pointer is a GEP and the other isn't, then see if the GEP is a + // constant offset from the base, as in "P" and "gep P, 1". + if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { + Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); + return !VariableIdxFound; + } + + if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { + Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); + return !VariableIdxFound; + } + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and // potentially variable) indices. After that they handle some constant // offset, which determines their offset from each other. At this point, we // handle no other case. - GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); - GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) return false; @@ -137,7 +104,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) break; - bool VariableIdxFound = false; int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); if (VariableIdxFound) return false; @@ -171,7 +137,7 @@ struct MemsetRange { unsigned Alignment; /// TheStores - The actual stores that make up this range. - SmallVector<StoreInst*, 16> TheStores; + SmallVector<Instruction*, 16> TheStores; bool isProfitableToUseMemset(const TargetData &TD) const; @@ -181,10 +147,19 @@ struct MemsetRange { bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // If we found more than 8 stores to merge or 64 bytes, use memset. if (TheStores.size() >= 8 || End-Start >= 64) return true; + + // If there is nothing to merge, don't do anything. + if (TheStores.size() < 2) return false; + + // If any of the stores are a memset, then it is always good to extend the + // memset. + for (unsigned i = 0, e = TheStores.size(); i != e; ++i) + if (!isa<StoreInst>(TheStores[i])) + return true; // Assume that the code generator is capable of merging pairs of stores // together if it wants to. - if (TheStores.size() <= 2) return false; + if (TheStores.size() == 2) return false; // If we have fewer than 8 stores, it can still be worthwhile to do this. // For example, merging 4 i8 stores into an i32 store is useful almost always. @@ -215,31 +190,53 @@ class MemsetRanges { /// because each element is relatively large and expensive to copy. std::list<MemsetRange> Ranges; typedef std::list<MemsetRange>::iterator range_iterator; - TargetData &TD; + const TargetData &TD; public: - MemsetRanges(TargetData &td) : TD(td) {} + MemsetRanges(const TargetData &td) : TD(td) {} typedef std::list<MemsetRange>::const_iterator const_iterator; const_iterator begin() const { return Ranges.begin(); } const_iterator end() const { return Ranges.end(); } bool empty() const { return Ranges.empty(); } - void addStore(int64_t OffsetFromFirst, StoreInst *SI); + void addInst(int64_t OffsetFromFirst, Instruction *Inst) { + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + addStore(OffsetFromFirst, SI); + else + addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); + } + + void addStore(int64_t OffsetFromFirst, StoreInst *SI) { + int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); + + addRange(OffsetFromFirst, StoreSize, + SI->getPointerOperand(), SI->getAlignment(), SI); + } + + void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { + int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); + } + + void addRange(int64_t Start, int64_t Size, Value *Ptr, + unsigned Alignment, Instruction *Inst); + }; } // end anon namespace -/// addStore - Add a new store to the MemsetRanges data structure. This adds a +/// addRange - Add a new store to the MemsetRanges data structure. This adds a /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. -void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { - int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType()); - - // Do a linear search of the ranges to see if this can be joined and/or to - // find the insertion point in the list. We keep the ranges sorted for - // simplicity here. This is a linear search of a linked list, which is ugly, - // however the number of ranges is limited, so this won't get crazy slow. +/// +/// Do a linear search of the ranges to see if this can be joined and/or to +/// find the insertion point in the list. We keep the ranges sorted for +/// simplicity here. This is a linear search of a linked list, which is ugly, +/// however the number of ranges is limited, so this won't get crazy slow. +void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, + unsigned Alignment, Instruction *Inst) { + int64_t End = Start+Size; range_iterator I = Ranges.begin(), E = Ranges.end(); while (I != E && Start > I->End) @@ -252,14 +249,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { MemsetRange &R = *Ranges.insert(I, MemsetRange()); R.Start = Start; R.End = End; - R.StartPtr = SI->getPointerOperand(); - R.Alignment = SI->getAlignment(); - R.TheStores.push_back(SI); + R.StartPtr = Ptr; + R.Alignment = Alignment; + R.TheStores.push_back(Inst); return; } - + // This store overlaps with I, add it. - I->TheStores.push_back(SI); + I->TheStores.push_back(Inst); // At this point, we may have an interval that completely contains our store. // If so, just add it to the interval and return. @@ -274,8 +271,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { // stopped on *it*. if (Start < I->Start) { I->Start = Start; - I->StartPtr = SI->getPointerOperand(); - I->Alignment = SI->getAlignment(); + I->StartPtr = Ptr; + I->Alignment = Alignment; } // Now we know that Start <= I->End and Start >= I->Start (so the startpoint @@ -301,10 +298,16 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { namespace { class MemCpyOpt : public FunctionPass { - bool runOnFunction(Function &F); + MemoryDependenceAnalysis *MD; + const TargetData *TD; public: static char ID; // Pass identification, replacement for typeid - MemCpyOpt() : FunctionPass(ID) {} + MemCpyOpt() : FunctionPass(ID) { + initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); + MD = 0; + } + + bool runOnFunction(Function &F); private: // This transformation requires dominator postdominator info @@ -319,9 +322,17 @@ namespace { // Helper fuctions bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); + bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M); bool processMemMove(MemMoveInst *M); - bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C); + bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, + uint64_t cpyLen, CallInst *C); + bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, + uint64_t MSize); + bool processByValArgument(CallSite CS, unsigned ArgNo); + Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, + Value *ByteVal); + bool iterateOnFunction(Function &F); }; @@ -331,165 +342,199 @@ namespace { // createMemCpyOptPass - The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } -INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false); - - +INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", + false, false) -/// processStore - When GVN is scanning forward over instructions, we look for +/// tryMergingIntoMemset - When scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to -/// neighboring locations of memory. If it sees enough consequtive ones -/// (currently 4) it attempts to merge them together into a memcpy/memset. -bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { - if (SI->isVolatile()) return false; - - LLVMContext &Context = SI->getContext(); - - // There are two cases that are interesting for this code to handle: memcpy - // and memset. Right now we only handle memset. +/// neighboring locations of memory. If it sees enough consecutive ones, it +/// attempts to merge them together into a memcpy/memset. +Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, + Value *StartPtr, Value *ByteVal) { + if (TD == 0) return 0; - // Ensure that the value being stored is something that can be memset'able a - // byte at a time like "0" or "-1" or any width, as well as things like - // 0xA0A0A0A0 and 0.0. - Value *ByteVal = isBytewiseValue(SI->getOperand(0)); - if (!ByteVal) - return false; - - TargetData *TD = getAnalysisIfAvailable<TargetData>(); - if (!TD) return false; - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - Module *M = SI->getParent()->getParent()->getParent(); - // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); - Value *StartPtr = SI->getPointerOperand(); - - BasicBlock::iterator BI = SI; + BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { - if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { - // If the call is readnone, ignore it, otherwise bail out. We don't even - // allow readonly here because we don't want something like: + if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { + // If the instruction is readnone, ignore it, otherwise bail out. We + // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). - if (AA.getModRefBehavior(CallSite(BI)) == - AliasAnalysis::DoesNotAccessMemory) - continue; - - // TODO: If this is a memset, try to join it in. - - break; - } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI)) - break; - - // If this is a non-store instruction it is fine, ignore it. - StoreInst *NextStore = dyn_cast<StoreInst>(BI); - if (NextStore == 0) continue; + if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) + break; + continue; + } - // If this is a store, see if we can merge it in. - if (NextStore->isVolatile()) break; + if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { + // If this is a store, see if we can merge it in. + if (NextStore->isVolatile()) break; - // Check to see if this stored value is of the same byte-splattable value. - if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) - break; - - // Check to see if this store is to a constant offset from the start ptr. - int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) - break; - - Ranges.addStore(Offset, NextStore); + // Check to see if this stored value is of the same byte-splattable value. + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), + Offset, *TD)) + break; + + Ranges.addStore(Offset, NextStore); + } else { + MemSetInst *MSI = cast<MemSetInst>(BI); + + if (MSI->isVolatile() || ByteVal != MSI->getValue() || + !isa<ConstantInt>(MSI->getLength())) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) + break; + + Ranges.addMemSet(Offset, MSI); + } } - + // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) - return false; + return 0; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. - Ranges.addStore(0, SI); - - + Ranges.addInst(0, StartInst); + + // If we create any memsets, we put it right before the first instruction that + // isn't part of the memset block. This ensure that the memset is dominated + // by any addressing instruction needed by the start of the block. + IRBuilder<> Builder(BI); + // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. - bool MadeChange = false; + Instruction *AMemSet = 0; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; - + if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; - // Otherwise, we do want to transform this! Create a new memset. We put - // the memset right before the first instruction that isn't part of this - // memset block. This ensure that the memset is dominated by any addressing - // instruction needed by the start of the block. - BasicBlock::iterator InsertPt = BI; - + // Otherwise, we do want to transform this! Create a new memset. // Get the starting pointer of the block. StartPtr = Range.StartPtr; - + // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { const Type *EltType = - cast<PointerType>(StartPtr->getType())->getElementType(); + cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } - - // Cast the start ptr to be i8* as memset requires. - const PointerType* StartPTy = cast<PointerType>(StartPtr->getType()); - const PointerType *i8Ptr = Type::getInt8PtrTy(Context, - StartPTy->getAddressSpace()); - if (StartPTy!= i8Ptr) - StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), - InsertPt); - - Value *Ops[] = { - StartPtr, ByteVal, // Start, value - // size - ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), - // align - ConstantInt::get(Type::getInt32Ty(Context), Alignment), - // volatile - ConstantInt::get(Type::getInt1Ty(Context), 0), - }; - const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; - - Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); - - Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); + + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - dbgs() << *Range.TheStores[i]; - dbgs() << "With: " << *C); C=C; - - // Don't invalidate the iterator - BBI = BI; - + dbgs() << *Range.TheStores[i] << '\n'; + dbgs() << "With: " << *AMemSet << '\n'); + // Zap all the stores. - for (SmallVector<StoreInst*, 16>::const_iterator + for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), - SE = Range.TheStores.end(); SI != SE; ++SI) + SE = Range.TheStores.end(); SI != SE; ++SI) { + MD->removeInstruction(*SI); (*SI)->eraseFromParent(); + } ++NumMemSetInfer; - MadeChange = true; } - return MadeChange; + return AMemSet; +} + + +bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { + if (SI->isVolatile()) return false; + + if (TD == 0) return false; + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. + if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { + if (!LI->isVolatile() && LI->hasOneUse()) { + MemDepResult dep = MD->getDependency(LI); + CallInst *C = 0; + if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst())) + C = dyn_cast<CallInst>(dep.getInst()); + + if (C) { + bool changed = performCallSlotOptzn(LI, + SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); + if (changed) { + MD->removeInstruction(SI); + SI->eraseFromParent(); + MD->removeInstruction(LI); + LI->eraseFromParent(); + ++NumMemCpyInstr; + return true; + } + } + } + } + + // There are two cases that are interesting for this code to handle: memcpy + // and memset. Right now we only handle memset. + + // Ensure that the value being stored is something that can be memset'able a + // byte at a time like "0" or "-1" or any width, as well as things like + // 0xA0A0A0A0 and 0.0. + if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) + if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), + ByteVal)) { + BBI = I; // Don't invalidate iterator. + return true; + } + + return false; +} + +bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { + // See if there is another memset or store neighboring this memset which + // allows us to widen out the memset to do a single larger store. + if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) + if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), + MSI->getValue())) { + BBI = I; // Don't invalidate iterator. + return true; + } + return false; } /// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. -bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { +bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, + Value *cpyDest, Value *cpySrc, + uint64_t cpyLen, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) @@ -506,24 +551,15 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. - Value *cpyDest = cpy->getDest(); - Value *cpySrc = cpy->getSource(); CallSite CS(C); - // We need to be able to reason about the size of the memcpy, so we require - // that it be a constant. - ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); - if (!cpyLength) - return false; - // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. - TargetData *TD = getAnalysisIfAvailable<TargetData>(); - if (!TD) return false; + if (TD == 0) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) @@ -532,7 +568,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); - if (cpyLength->getZExtValue() < srcSize) + if (cpyLen < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a @@ -601,8 +637,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != - AliasAnalysis::NoModRef) + if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. @@ -625,99 +660,142 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - MD.removeInstruction(C); + MD->removeInstruction(C); - // Remove the memcpy - MD.removeInstruction(cpy); - cpy->eraseFromParent(); + // Remove the memcpy. + MD->removeInstruction(cpy); ++NumMemCpyInstr; return true; } -/// processMemCpy - perform simplification of memcpy's. If we have memcpy A -/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite -/// B to be a memcpy from X to Z (or potentially a memmove, depending on -/// circumstances). This allows later passes to remove the first memcpy -/// altogether. -bool MemCpyOpt::processMemCpy(MemCpyInst *M) { - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - - // The are two possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - MemDepResult dep = MD.getDependency(M); - if (!dep.isClobber()) - return false; - if (!isa<MemCpyInst>(dep.getInst())) { - if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) - return performCallSlotOptzn(M, C); +/// processMemCpyMemCpyDependence - We've found that the (upward scanning) +/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to +/// copy from MDep's input if we can. MSize is the size of M's copy. +/// +bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, + uint64_t MSize) { + // We can only transforms memcpy's where the dest of one is the source of the + // other. + if (M->getSource() != MDep->getDest() || MDep->isVolatile()) return false; - } - - MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst()); - // We can only transforms memcpy's where the dest of one is the source of the - // other - if (M->getSource() != MDep->getDest()) + // If dep instruction is reading from our current input, then it is a noop + // transfer and substituting the input won't change this instruction. Just + // ignore the input and let someone else zap MDep. This handles cases like: + // memcpy(a <- a) + // memcpy(b <- a) + if (M->getSource() == MDep->getSource()) return false; // Second, the length of the memcpy's must be the same, or the preceeding one // must be larger than the following one. - ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); - ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength()); - if (!C1 || !C2) - return false; - - uint64_t DepSize = C1->getValue().getZExtValue(); - uint64_t CpySize = C2->getValue().getZExtValue(); - - if (DepSize < CpySize) + ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); + ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); + if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; - // Finally, we have to make sure that the dest of the second does not - // alias the source of the first AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != - AliasAnalysis::NoAlias) + + // Verify that the copied-from memory doesn't change in between the two + // transfers. For example, in: + // memcpy(a <- b) + // *b = 42; + // memcpy(c <- a) + // It would be invalid to transform the second memcpy into memcpy(c <- b). + // + // TODO: If the code between M and MDep is transparent to the destination "c", + // then we could still perform the xform by moving M up to the first memcpy. + // + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), + false, M, M->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; - else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) != - AliasAnalysis::NoAlias) + + // If the dest of the second might alias the source of the first, then the + // source and dest might overlap. We still want to eliminate the intermediate + // value, but we have to generate a memmove instead of memcpy. + bool UseMemMove = false; + if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) + UseMemMove = true; + + // If all checks passed, then we can transform M. + + // Make sure to use the lesser of the alignment of the source and the dest + // since we're changing where we're reading from, but don't want to increase + // the alignment past what can be read from or written to. + // TODO: Is this worth it if we're creating a less aligned memcpy? For + // example we could be moving from movaps -> movq on x86. + unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); + + IRBuilder<> Builder(M); + if (UseMemMove) + Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), + Align, M->isVolatile()); + else + Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), + Align, M->isVolatile()); + + // Remove the instruction we're replacing. + MD->removeInstruction(M); + M->eraseFromParent(); + ++NumMemCpyInstr; + return true; +} + + +/// processMemCpy - perform simplification of memcpy's. If we have memcpy A +/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite +/// B to be a memcpy from X to Z (or potentially a memmove, depending on +/// circumstances). This allows later passes to remove the first memcpy +/// altogether. +bool MemCpyOpt::processMemCpy(MemCpyInst *M) { + // We can only optimize statically-sized memcpy's that are non-volatile. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (CopySize == 0 || M->isVolatile()) return false; + + // If the source and destination of the memcpy are the same, then zap it. + if (M->getSource() == M->getDest()) { + MD->removeInstruction(M); + M->eraseFromParent(); return false; - else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize) - != AliasAnalysis::NoAlias) + } + + // If copying from a constant, try to turn the memcpy into a memset. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { + IRBuilder<> Builder(M); + Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize, + M->getAlignment(), false); + MD->removeInstruction(M); + M->eraseFromParent(); + ++NumCpyToSet; + return true; + } + + // The are two possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + MemDepResult DepInfo = MD->getDependency(M); + if (!DepInfo.isClobber()) return false; - // If all checks passed, then we can transform these memcpy's - const Type *ArgTys[3] = { M->getRawDest()->getType(), - MDep->getRawSource()->getType(), - M->getLength()->getType() }; - Function *MemCpyFun = Intrinsic::getDeclaration( - M->getParent()->getParent()->getParent(), - M->getIntrinsicID(), ArgTys, 3); + if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); - Value *Args[5] = { - M->getRawDest(), MDep->getRawSource(), M->getLength(), - M->getAlignmentCst(), M->getVolatileCst() - }; - - CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M); - - - // If C and M don't interfere, then this is a valid transformation. If they - // did, this would mean that the two sources overlap, which would be bad. - if (MD.getDependency(C) == dep) { - MD.removeInstruction(M); - M->eraseFromParent(); - ++NumMemCpyInstr; - return true; + if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { + if (performCallSlotOptzn(M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), C)) { + MD->removeInstruction(M); + M->eraseFromParent(); + return true; + } } - // Otherwise, there was no point in doing this, so we remove the call we - // inserted and act like nothing happened. - MD.removeInstruction(C); - C->eraseFromParent(); return false; } @@ -726,15 +804,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - // If the memmove is a constant size, use it for the alias query, this allows - // us to optimize things like: memmove(P, P+64, 64); - uint64_t MemMoveSize = ~0ULL; - if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength())) - MemMoveSize = Len->getZExtValue(); - // See if the pointers alias. - if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) != - AliasAnalysis::NoAlias) + if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); @@ -749,33 +820,107 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. - getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M); + MD->removeInstruction(M); ++NumMoveToCpy; return true; } +/// processByValArgument - This is called on every byval argument in call sites. +bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { + if (TD == 0) return false; + + // Find out what feeds this byval argument. + Value *ByValArg = CS.getArgument(ArgNo); + const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType(); + uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); + MemDepResult DepInfo = + MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), + true, CS.getInstruction(), + CS.getInstruction()->getParent()); + if (!DepInfo.isClobber()) + return false; + + // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, see if we can byval from the source of the memcpy instead of the + // result. + MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); + if (MDep == 0 || MDep->isVolatile() || + ByValArg->stripPointerCasts() != MDep->getDest()) + return false; + + // The length of the memcpy must be larger or equal to the size of the byval. + ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); + if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) + return false; + + // Get the alignment of the byval. If it is greater than the memcpy, then we + // can't do the substitution. If the call doesn't specify the alignment, then + // it is some target specific value that we can't know. + unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) + return false; + + // Verify that the copied-from memory doesn't change in between the memcpy and + // the byval call. + // memcpy(a <- b) + // *b = 42; + // foo(*a) + // It would be invalid to transform the second memcpy into foo(*b). + // + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), + false, CS.getInstruction(), MDep->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + + Value *TmpCast = MDep->getSource(); + if (MDep->getSource()->getType() != ByValArg->getType()) + TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), + "tmpcast", CS.getInstruction()); + + DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" + << " " << *MDep << "\n" + << " " << *CS.getInstruction() << "\n"); + + // Otherwise we're good! Update the byval argument. + CS.setArgument(ArgNo, TmpCast); + ++NumMemCpyInstr; + return true; +} -// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN. +/// iterateOnFunction - Executes one iteration of MemCpyOpt. bool MemCpyOpt::iterateOnFunction(Function &F) { bool MadeChange = false; // Walk all instruction in the function. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE;) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { // Avoid invalidating the iterator. Instruction *I = BI++; + bool RepeatInstruction = false; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) MadeChange |= processStore(SI, BI); + else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) + RepeatInstruction = processMemSet(M, BI); else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) - MadeChange |= processMemCpy(M); - else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) { - if (processMemMove(M)) { - --BI; // Reprocess the new memcpy. - MadeChange = true; - } + RepeatInstruction = processMemCpy(M); + else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) + RepeatInstruction = processMemMove(M); + else if (CallSite CS = (Value*)I) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.paramHasAttr(i+1, Attribute::ByVal)) + MadeChange |= processByValArgument(CS, i); + } + + // Reprocess the instruction if desired. + if (RepeatInstruction) { + if (BI != BB->begin()) --BI; + MadeChange = true; } } } @@ -788,14 +933,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { // bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; + MD = &getAnalysis<MemoryDependenceAnalysis>(); + TD = getAnalysisIfAvailable<TargetData>(); while (1) { if (!iterateOnFunction(F)) break; MadeChange = true; } + MD = 0; return MadeChange; } - - - diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index b8afcc1..e093b52 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -77,7 +77,9 @@ namespace { bool MadeChange; public: static char ID; // Pass identification, replacement for typeid - Reassociate() : FunctionPass(ID) {} + Reassociate() : FunctionPass(ID) { + initializeReassociatePass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); @@ -104,7 +106,7 @@ namespace { char Reassociate::ID = 0; INITIALIZE_PASS(Reassociate, "reassociate", - "Reassociate expressions", false, false); + "Reassociate expressions", false, false) // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } @@ -238,6 +240,12 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { RHS->setOperand(0, LHS); I->setOperand(0, RHS); + // Conservatively clear all the optional flags, which may not hold + // after the reassociation. + I->clearSubclassOptionalData(); + LHS->clearSubclassOptionalData(); + RHS->clearSubclassOptionalData(); + ++NumLinear; MadeChange = true; DEBUG(dbgs() << "Linearized: " << *I << '\n'); @@ -339,6 +347,12 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(0, Ops[i].Op); I->setOperand(1, Ops[i+1].Op); + + // Clear all the optional flags, which may not hold after the + // reassociation if the expression involved more than just this operation. + if (Ops.size() != 2) + I->clearSubclassOptionalData(); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -354,6 +368,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, if (I->getOperand(1) != Ops[i].Op) { DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(1, Ops[i].Op); + + // Conservatively clear all the optional flags, which may not hold + // after the reassociation. + I->clearSubclassOptionalData(); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -809,16 +828,23 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); SmallVector<Value*, 4> NewMulOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) continue; if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { - NewMulOps.push_back(V); - Ops.erase(Ops.begin()+i); - --i; --e; + // The factorized operand may occur several times. Convert them all in + // one fell swoop. + for (unsigned j = Ops.size(); j != i;) { + --j; + if (Ops[j].Op == Ops[i].Op) { + NewMulOps.push_back(V); + Ops.erase(Ops.begin()+j); + } + } + --i; } } diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp index 506b72a..459bb06 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp @@ -36,7 +36,9 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); namespace { struct RegToMem : public FunctionPass { static char ID; // Pass identification, replacement for typeid - RegToMem() : FunctionPass(ID) {} + RegToMem() : FunctionPass(ID) { + initializeRegToMemPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); @@ -59,9 +61,11 @@ namespace { } char RegToMem::ID = 0; -INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots", - false, false); - +INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false) bool RegToMem::runOnFunction(Function &F) { if (F.isDeclaration()) diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 6115c05..c82e929 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -481,6 +481,19 @@ private: } } + /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS + /// map for I and PN, but if one is there already, do not create another. + /// (Duplicate entries do not break anything directly, but can lead to + /// exponential growth of the table in rare cases.) + void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) { + std::multimap<PHINode*, Instruction*>::iterator J, E; + tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN); + for (; J != E; ++J) + if (J->second == I) + return; + UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I)); + } + private: friend class InstVisitor<SCCPSolver>; @@ -973,9 +986,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { if (Result.isConstant()) { markConstant(IV, &I, Result.getConstant()); // Remember that this instruction is virtually using the PHI node - // operands. - UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I)); - UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I)); + // operands. + InsertInOverdefinedPHIs(&I, PN1); + InsertInOverdefinedPHIs(&I, PN2); return; } @@ -1056,8 +1069,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { markConstant(&I, Result.getConstant()); // Remember that this instruction is virtually using the PHI node // operands. - UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I)); - UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I)); + InsertInOverdefinedPHIs(&I, PN1); + InsertInOverdefinedPHIs(&I, PN2); return; } @@ -1585,22 +1598,20 @@ namespace { /// struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid - SCCP() : FunctionPass(ID) {} + SCCP() : FunctionPass(ID) { + initializeSCCPPass(*PassRegistry::getPassRegistry()); + } // runOnFunction - Run the Sparse Conditional Constant Propagation // algorithm, and return true if the function was modified. // bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - } }; } // end anonymous namespace char SCCP::ID = 0; INITIALIZE_PASS(SCCP, "sccp", - "Sparse Conditional Constant Propagation", false, false); + "Sparse Conditional Constant Propagation", false, false) // createSCCPPass - This is the public interface to this file. FunctionPass *llvm::createSCCPPass() { @@ -1701,7 +1712,9 @@ namespace { /// struct IPSCCP : public ModulePass { static char ID; - IPSCCP() : ModulePass(ID) {} + IPSCCP() : ModulePass(ID) { + initializeIPSCCPPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); }; } // end anonymous namespace @@ -1709,7 +1722,7 @@ namespace { char IPSCCP::ID = 0; INITIALIZE_PASS(IPSCCP, "ipsccp", "Interprocedural Sparse Conditional Constant Propagation", - false, false); + false, false) // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp index cb03423..bf9ca6d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -7,12 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements the C bindings for libLLVMScalarOpts.a, which implements -// several scalar transformations over the LLVM intermediate representation. +// This file implements common infrastructure for libLLVMScalarOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. // //===----------------------------------------------------------------------===// #include "llvm-c/Transforms/Scalar.h" +#include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetData.h" @@ -20,6 +23,50 @@ using namespace llvm; +/// initializeScalarOptsPasses - Initialize all passes linked into the +/// ScalarOpts library. +void llvm::initializeScalarOpts(PassRegistry &Registry) { + initializeADCEPass(Registry); + initializeBlockPlacementPass(Registry); + initializeCodeGenPreparePass(Registry); + initializeConstantPropagationPass(Registry); + initializeCorrelatedValuePropagationPass(Registry); + initializeDCEPass(Registry); + initializeDeadInstEliminationPass(Registry); + initializeDSEPass(Registry); + initializeGEPSplitterPass(Registry); + initializeGVNPass(Registry); + initializeEarlyCSEPass(Registry); + initializeIndVarSimplifyPass(Registry); + initializeJumpThreadingPass(Registry); + initializeLICMPass(Registry); + initializeLoopDeletionPass(Registry); + initializeLoopInstSimplifyPass(Registry); + initializeLoopRotatePass(Registry); + initializeLoopStrengthReducePass(Registry); + initializeLoopUnrollPass(Registry); + initializeLoopUnswitchPass(Registry); + initializeLoopIdiomRecognizePass(Registry); + initializeLowerAtomicPass(Registry); + initializeMemCpyOptPass(Registry); + initializeReassociatePass(Registry); + initializeRegToMemPass(Registry); + initializeSCCPPass(Registry); + initializeIPSCCPPass(Registry); + initializeSROA_DTPass(Registry); + initializeSROA_SSAUpPass(Registry); + initializeCFGSimplifyPassPass(Registry); + initializeSimplifyHalfPowrLibCallsPass(Registry); + initializeSimplifyLibCallsPass(Registry); + initializeSinkingPass(Registry); + initializeTailDupPass(Registry); + initializeTailCallElimPass(Registry); +} + +void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { + initializeScalarOpts(*unwrap(R)); +} + void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createAggressiveDCEPass()); } @@ -56,10 +103,6 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } -void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLoopIndexSplitPass()); -} - void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopRotatePass()); } diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index fee317d..c3ca852 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -31,28 +31,34 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); +STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - explicit SROA(signed T = -1) : FunctionPass(ID) { + SROA(int T, bool hasDT, char &ID) + : FunctionPass(ID), HasDomTree(hasDT) { if (T == -1) SRThreshold = 128; else @@ -64,17 +70,10 @@ namespace { bool performScalarRepl(Function &F); bool performPromotion(Function &F); - // getAnalysisUsage - This pass does not require any passes, but we know it - // will not alter the CFG, so say so. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); - AU.setPreservesCFG(); - } - private: + bool HasDomTree; TargetData *TD; - + /// DeadInsts - Keep track of instructions we have made dead, so that /// we can remove them after we are done working. SmallVector<Value*, 32> DeadInsts; @@ -83,39 +82,61 @@ namespace { /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. struct AllocaInfo { + /// The alloca to promote. + AllocaInst *AI; + + /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite + /// looping and avoid redundant work. + SmallPtrSet<PHINode*, 8> CheckedPHIs; + /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; - + /// isMemCpySrc - This is true if this aggregate is memcpy'd from. bool isMemCpySrc : 1; /// isMemCpyDst - This is true if this aggregate is memcpy'd into. bool isMemCpyDst : 1; - AllocaInfo() - : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {} + /// hasSubelementAccess - This is true if a subelement of the alloca is + /// ever accessed, or false if the alloca is only accessed with mem + /// intrinsics or load/store that only access the entire alloca at once. + bool hasSubelementAccess : 1; + + /// hasALoadOrStore - This is true if there are any loads or stores to it. + /// The alloca may just be accessed with memcpy, for example, which would + /// not set this. + bool hasALoadOrStore : 1; + + explicit AllocaInfo(AllocaInst *ai) + : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), + hasSubelementAccess(false), hasALoadOrStore(false) {} }; - + unsigned SRThreshold; - void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } + void MarkUnsafe(AllocaInfo &I, Instruction *User) { + I.isUnsafe = true; + DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); + } bool isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, - AllocaInfo &Info); - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, AllocaInfo &Info); + void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); + void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, + AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); + void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, AllocaInfo &Info, + Instruction *TheAccess, bool AllowWholeAccess); bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, const Type *&IdxTy); - - void DoScalarReplacement(AllocaInst *AI, + + void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); void DeleteDeadInstructions(); - + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, @@ -129,18 +150,63 @@ namespace { SmallVector<AllocaInst*, 32> &NewElts); void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - + static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); }; + + // SROA_DT - SROA that uses DominatorTree. + struct SROA_DT : public SROA { + static char ID; + public: + SROA_DT(int T = -1) : SROA(T, true, ID) { + initializeSROA_DTPass(*PassRegistry::getPassRegistry()); + } + + // getAnalysisUsage - This pass does not require any passes, but we know it + // will not alter the CFG, so say so. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); + } + }; + + // SROA_SSAUp - SROA that uses SSAUpdater. + struct SROA_SSAUp : public SROA { + static char ID; + public: + SROA_SSAUp(int T = -1) : SROA(T, false, ID) { + initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); + } + + // getAnalysisUsage - This pass does not require any passes, but we know it + // will not alter the CFG, so say so. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; + } -char SROA::ID = 0; -INITIALIZE_PASS(SROA, "scalarrepl", - "Scalar Replacement of Aggregates", false, false); +char SROA_DT::ID = 0; +char SROA_SSAUp::ID = 0; + +INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", + "Scalar Replacement of Aggregates (DT)", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(SROA_DT, "scalarrepl", + "Scalar Replacement of Aggregates (DT)", false, false) + +INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", + "Scalar Replacement of Aggregates (SSAUp)", false, false) +INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", + "Scalar Replacement of Aggregates (SSAUp)", false, false) // Public interface to the ScalarReplAggregates pass -FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { - return new SROA(Threshold); +FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, + bool UseDomTree) { + if (UseDomTree) + return new SROA_DT(Threshold); + return new SROA_SSAUp(Threshold); } @@ -156,16 +222,16 @@ class ConvertToScalarInfo { /// AllocaSize - The size of the alloca being considered. unsigned AllocaSize; const TargetData &TD; - + /// IsNotTrivial - This is set to true if there is some access to the object /// which means that mem2reg can't promote it. bool IsNotTrivial; - + /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. const Type *VectorTy; - + /// HadAVector - True if there is at least one vector access to the alloca. /// We don't want to turn random arrays into vectors and use vector element /// insert/extract, but if there are element accesses to something that is @@ -179,14 +245,14 @@ public: VectorTy = 0; HadAVector = false; } - + AllocaInst *TryConvert(AllocaInst *AI); - + private: bool CanConvertToScalar(Value *V, uint64_t Offset); void MergeInType(const Type *In, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - + Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, @@ -195,26 +261,6 @@ private: } // end anonymous namespace. -/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't -/// allowed to form. We do this to avoid MMX types, which is a complete hack, -/// but is required until the backend is fixed. -static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) { - StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple()); - if (!Triple.startswith("i386") && - !Triple.startswith("x86_64")) - return false; - - // Reject all the MMX vector types. - switch (VTy->getNumElements()) { - default: return false; - case 1: return VTy->getElementType()->isIntegerTy(64); - case 2: return VTy->getElementType()->isIntegerTy(32); - case 4: return VTy->getElementType()->isIntegerTy(16); - case 8: return VTy->getElementType()->isIntegerTy(8); - } -} - - /// TryConvert - Analyze the specified alloca, and if it is safe to do so, /// rewrite it to be a new alloca which is mem2reg'able. This returns the new /// alloca if possible or null if not. @@ -223,7 +269,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // out. if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) return 0; - + // If we were able to find a vector type that can handle this with // insert/extract elements, and if there was at least one use that had // a vector type, promote this to a vector. We don't want to promote @@ -231,8 +277,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector && - !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) { + if (VectorTy && VectorTy->isVectorTy() && HadAVector) { DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. @@ -263,7 +308,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) return; - + // If this could be contributing to a vector, analyze it. // If the In type is a vector that is the same size as the alloca, see if it @@ -271,7 +316,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { // Remember if we saw a vector type. HadAVector = true; - + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { // If we're storing/loading a vector of the right size, allow it as a // vector. If this the first vector we see, remember the type so that @@ -290,7 +335,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // compatible with it. unsigned EltSize = In->getPrimitiveSizeInBits()/8; if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || + (VectorTy == 0 || cast<VectorType>(VectorTy)->getElementType() ->getPrimitiveSizeInBits()/8 == EltSize)) { if (VectorTy == 0) @@ -298,7 +343,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { return; } } - + // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. VectorTy = Type::getVoidTy(In->getContext()); @@ -316,22 +361,28 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { Instruction *User = cast<Instruction>(*UI); - + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // Don't break volatile loads. if (LI->isVolatile()) return false; + // Don't touch MMX operations. + if (LI->getType()->isX86_MMXTy()) + return false; MergeInType(LI->getType(), Offset); continue; } - + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Storing the pointer, not into the value? if (SI->getOperand(0) == V || SI->isVolatile()) return false; + // Don't touch MMX operations. + if (SI->getOperand(0)->getType()->isX86_MMXTy()) + return false; MergeInType(SI->getOperand(0)->getType(), Offset); continue; } - + if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { IsNotTrivial = true; // Can't be mem2reg'd. if (!CanConvertToScalar(BCI, Offset)) @@ -343,7 +394,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a GEP with a variable indices, we can't handle it. if (!GEP->hasAllConstantIndices()) return false; - + // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), @@ -372,15 +423,15 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) return false; - + IsNotTrivial = true; // Can't be mem2reg'd. continue; } - + // Otherwise, we cannot handle this! return false; } - + return true; } @@ -411,9 +462,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, GEP->eraseFromParent(); continue; } - - IRBuilder<> Builder(User->getParent(), User); - + + IRBuilder<> Builder(User); + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // The load is a bit extract from NewAI shifted right by Offset bits. Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); @@ -423,7 +474,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, LI->eraseFromParent(); continue; } - + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); @@ -431,14 +482,14 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); - + // If the load we just inserted is now dead, then the inserted store // overwrote the entire thing. if (Old->use_empty()) Old->eraseFromParent(); continue; } - + // If this is a constant sized memset of a constant value (e.g. 0) we can // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { @@ -446,7 +497,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); if (NumBytes != 0) { unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); - + // Compute the value replicated the right number of times. APInt APVal(NumBytes*8, Val); @@ -454,17 +505,17 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (Val) for (unsigned i = 1; i != NumBytes; ++i) APVal |= APVal << 8; - + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); Builder.CreateStore(New, NewAI); - + // If the load we just inserted is now dead, then the memset overwrote // the entire thing. if (Old->use_empty()) - Old->eraseFromParent(); + Old->eraseFromParent(); } MSI->eraseFromParent(); continue; @@ -474,29 +525,42 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { assert(Offset == 0 && "must be store to start of alloca"); - + // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store // as appropriate. - AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0)); - - if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { + AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0)); + + if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) { // Dest must be OrigAI, change this to be a load from the original // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); Value *SrcPtr = MTI->getSource(); - SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); - + const PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); + const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { + AIPTy = PointerType::get(AIPTy->getElementType(), + SPTy->getAddressSpace()); + } + SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); + LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); SrcVal->setAlignment(MTI->getAlignment()); Builder.CreateStore(SrcVal, NewAI); - } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { + } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store // through the original dest pointer (bitcasted). assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); + const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); + const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { + AIPTy = PointerType::get(AIPTy->getElementType(), + DPTy->getAddressSpace()); + } + Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); + StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); NewStore->setAlignment(MTI->getAlignment()); } else { @@ -506,7 +570,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, MTI->eraseFromParent(); continue; } - + llvm_unreachable("Unsupported operation!"); } } @@ -548,7 +612,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, V = Builder.CreateBitCast(V, ToType, "tmp"); return V; } - + // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. if (const StructType *ST = dyn_cast<StructType>(ToType)) { @@ -562,7 +626,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, } return Res; } - + if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); @@ -598,7 +662,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, ConstantInt::get(FromVal->getType(), ShAmt), "tmp"); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateShl(FromVal, + FromVal = Builder.CreateShl(FromVal, ConstantInt::get(FromVal->getType(), -ShAmt), "tmp"); @@ -606,11 +670,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = - Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth), "tmp"); else if (LIBitWidth > NTy->getBitWidth()) FromVal = - Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth), "tmp"); // If the result is an integer, this is a trunc or bitcast. @@ -647,7 +711,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); - + // Changing the whole vector with memset or with an access of a different // vector type? if (ValSize == VecSize) @@ -657,28 +721,28 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Must be an element insertion. unsigned Elt = Offset/EltSize; - + if (SV->getType() != VTy->getElementType()) SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); - - SV = Builder.CreateInsertElement(Old, SV, + + SV = Builder.CreateInsertElement(Old, SV, ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); return SV; } - + // If SV is a first-class aggregate value, insert each value recursively. if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); - Old = ConvertScalar_InsertValue(Elt, Old, + Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), Builder); } return Old; } - + if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { @@ -778,16 +842,298 @@ bool SROA::runOnFunction(Function &F) { return Changed; } +namespace { +class AllocaPromoter : public LoadAndStorePromoter { + AllocaInst *AI; +public: + AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S) + : LoadAndStorePromoter(Insts, S), AI(0) {} + + void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { + // Remember which alloca we're promoting (for isInstInList). + this->AI = AI; + LoadAndStorePromoter::run(Insts); + AI->eraseFromParent(); + } + + virtual bool isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) const { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getOperand(0) == AI; + return cast<StoreInst>(I)->getPointerOperand() == AI; + } +}; +} // end anon namespace + +/// isSafeSelectToSpeculate - Select instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers and then +/// select between the result, allowing the load of the alloca to be promoted. +/// From this: +/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// %V2 = load i32* %Other +/// %V = select i1 %cond, i32 %V1, i32 %V2 +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || LI->isVolatile()) return false; + + // Both operands to the select need to be dereferencable, either absolutely + // (e.g. allocas) or at this point because we can see other accesses to it. + if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, + LI->getAlignment(), TD)) + return false; + if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, + LI->getAlignment(), TD)) + return false; + } + + return true; +} + +/// isSafePHIToSpeculate - PHI instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers in the pred +/// blocks and then PHI the results, allowing the load of the alloca to be +/// promoted. +/// From this: +/// %P2 = phi [i32* %Alloca, i32* %Other] +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// ... +/// %V2 = load i32* %Other +/// ... +/// %V = phi [i32 %V1, i32 %V2] +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { + // For now, we can only do this promotion if the load is in the same block as + // the PHI, and if there are no stores between the phi and load. + // TODO: Allow recursive phi users. + // TODO: Allow stores. + BasicBlock *BB = PN->getParent(); + unsigned MaxAlign = 0; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || LI->isVolatile()) return false; + + // For now we only allow loads in the same block as the PHI. This is a + // common case that happens when instcombine merges two loads through a PHI. + if (LI->getParent() != BB) return false; + + // Ensure that there are no instructions between the PHI and the load that + // could store. + for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + MaxAlign = std::max(MaxAlign, LI->getAlignment()); + } + + // Okay, we know that we have one or more loads in the same block as the PHI. + // We can transform this if it is safe to push the loads into the predecessor + // blocks. The only thing to watch out for is that we can't put a possibly + // trapping load in the predecessor if it is a critical edge. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + + // If the predecessor has a single successor, then the edge isn't critical. + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + Value *InVal = PN->getIncomingValue(i); + + // If the InVal is an invoke in the pred, we can't put a load on the edge. + if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) + if (II->getParent() == Pred) + return false; + + // If this pointer is always safe to load, or if we can prove that there is + // already a load in the block, then we can move the load to the pred block. + if (InVal->isDereferenceablePointer() || + isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) + continue; + + return false; + } + + return true; +} + + +/// tryToMakeAllocaBePromotable - This returns true if the alloca only has +/// direct (non-volatile) loads and stores to it. If the alloca is close but +/// not quite there, this will transform the code to allow promotion. As such, +/// it is a non-pure predicate. +static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { + SetVector<Instruction*, SmallVector<Instruction*, 4>, + SmallPtrSet<Instruction*, 4> > InstsToRewrite; + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (LI->isVolatile()) + return false; + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == AI || SI->isVolatile()) + return false; // Don't allow a store OF the AI, only INTO the AI. + continue; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(U)) { + // If the condition being selected on is a constant, fold the select, yes + // this does (rarely) happen early on. + if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) { + Value *Result = SI->getOperand(1+CI->isZero()); + SI->replaceAllUsesWith(Result); + SI->eraseFromParent(); + + // This is very rare and we just scrambled the use list of AI, start + // over completely. + return tryToMakeAllocaBePromotable(AI, TD); + } + + // If it is safe to turn "load (select c, AI, ptr)" into a select of two + // loads, then we can transform this by rewriting the select. + if (!isSafeSelectToSpeculate(SI, TD)) + return false; + + InstsToRewrite.insert(SI); + continue; + } + + if (PHINode *PN = dyn_cast<PHINode>(U)) { + if (PN->use_empty()) { // Dead PHIs can be stripped. + InstsToRewrite.insert(PN); + continue; + } + + // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads + // in the pred blocks, then we can transform this by rewriting the PHI. + if (!isSafePHIToSpeculate(PN, TD)) + return false; + + InstsToRewrite.insert(PN); + continue; + } + + return false; + } + + // If there are no instructions to rewrite, then all uses are load/stores and + // we're done! + if (InstsToRewrite.empty()) + return true; + + // If we have instructions that need to be rewritten for this to be promotable + // take care of it now. + for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { + // Selects in InstsToRewrite only have load uses. Rewrite each as two + // loads with a new select. + while (!SI->use_empty()) { + LoadInst *LI = cast<LoadInst>(SI->use_back()); + + IRBuilder<> Builder(LI); + LoadInst *TrueLoad = + Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); + LoadInst *FalseLoad = + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + + // Transfer alignment and TBAA info if present. + TrueLoad->setAlignment(LI->getAlignment()); + FalseLoad->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + } + + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); + V->takeName(LI); + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); + } + + // Now that all the loads are gone, the select is gone too. + SI->eraseFromParent(); + continue; + } + + // Otherwise, we have a PHI node which allows us to push the loads into the + // predecessors. + PHINode *PN = cast<PHINode>(InstsToRewrite[i]); + if (PN->use_empty()) { + PN->eraseFromParent(); + continue; + } + + const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + + // Get the TBAA tag and alignment to use from one of the loads. It doesn't + // matter which one we get and if any differ, it doesn't matter. + LoadInst *SomeLoad = cast<LoadInst>(PN->use_back()); + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); + unsigned Align = SomeLoad->getAlignment(); + + // Rewrite all loads of the PN to use the new PHI. + while (!PN->use_empty()) { + LoadInst *LI = cast<LoadInst>(PN->use_back()); + LI->replaceAllUsesWith(NewPN); + LI->eraseFromParent(); + } + + // Inject loads into all of the pred blocks. Keep track of which blocks we + // insert them into in case we have multiple edges from the same block. + DenseMap<BasicBlock*, LoadInst*> InsertedLoads; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + LoadInst *&Load = InsertedLoads[Pred]; + if (Load == 0) { + Load = new LoadInst(PN->getIncomingValue(i), + PN->getName() + "." + Pred->getName(), + Pred->getTerminator()); + Load->setAlignment(Align); + if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); + } + + NewPN->addIncoming(Load, Pred); + } + + PN->eraseFromParent(); + } + + ++NumAdjusted; + return true; +} + bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; - DominatorTree &DT = getAnalysis<DominatorTree>(); - DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); + DominatorTree *DT = 0; + if (HasDomTree) + DT = &getAnalysis<DominatorTree>(); BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; - + SmallVector<Instruction*, 64> Insts; while (1) { Allocas.clear(); @@ -795,12 +1141,27 @@ bool SROA::performPromotion(Function &F) { // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) + if (tryToMakeAllocaBePromotable(AI, TD)) Allocas.push_back(AI); if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + if (HasDomTree) + PromoteMemToReg(Allocas, *DT); + else { + SSAUpdater SSA; + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { + AllocaInst *AI = Allocas[i]; + + // Build list of instructions to promote. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + Insts.push_back(cast<Instruction>(*UI)); + + AllocaPromoter(Insts, SSA).run(AI, Insts); + Insts.clear(); + } + } NumPromoted += Allocas.size(); Changed = true; } @@ -842,7 +1203,7 @@ bool SROA::performScalarRepl(Function &F) { while (!WorkList.empty()) { AllocaInst *AI = WorkList.back(); WorkList.pop_back(); - + // Handle dead allocas trivially. These can be formed by SROA'ing arrays // with unused elements. if (AI->use_empty()) { @@ -854,7 +1215,7 @@ bool SROA::performScalarRepl(Function &F) { // If this alloca is impossible for us to promote, reject it early. if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) continue; - + // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by @@ -871,7 +1232,7 @@ bool SROA::performScalarRepl(Function &F) { Changed = true; continue; } - + // Check to see if we can perform the core SROA transformation. We cannot // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar @@ -880,10 +1241,10 @@ bool SROA::performScalarRepl(Function &F) { // Do not promote [0 x %struct]. if (AllocaSize == 0) continue; - + // Do not promote any struct whose size is too big. if (AllocaSize > SRThreshold) continue; - + // If the alloca looks like a good candidate for scalar replacement, and if // all its users can be transformed, then split up the aggregate into its // separate elements. @@ -906,8 +1267,8 @@ bool SROA::performScalarRepl(Function &F) { ++NumConverted; Changed = true; continue; - } - + } + // Otherwise, couldn't process this alloca. } @@ -916,14 +1277,14 @@ bool SROA::performScalarRepl(Function &F) { /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl /// predicate, do SROA now. -void SROA::DoScalarReplacement(AllocaInst *AI, +void SROA::DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList) { DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, + AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); @@ -971,48 +1332,106 @@ void SROA::DeleteDeadInstructions() { I->eraseFromParent(); } } - + /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to /// performing scalar replacement of alloca AI. The results are flagged in /// the Info parameter. Offset indicates the position within AI that is /// referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, +void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { - isSafeForScalarRepl(BC, AI, Offset, Info); + isSafeForScalarRepl(BC, Offset, Info); } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, AI, GEPOffset, Info); + isSafeGEP(GEPI, GEPOffset, Info); if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); + isSafeForScalarRepl(GEPI, GEPOffset, Info); } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); - if (Length) - isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 0, Info); - else - MarkUnsafe(Info); + if (Length == 0) + return MarkUnsafe(Info, User); + isSafeMemAccess(Offset, Length->getZExtValue(), 0, + UI.getOperandNo() == 0, Info, MI, + true /*AllowWholeAccess*/); + } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + if (LI->isVolatile()) + return MarkUnsafe(Info, User); + const Type *LIType = LI->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info, LI, true /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (SI->isVolatile() || SI->getOperand(0) == I) + return MarkUnsafe(Info, User); + + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info, SI, true /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { + isSafePHISelectUseForScalarRepl(User, Offset, Info); + } else { + return MarkUnsafe(Info, User); + } + if (Info.isUnsafe) return; + } +} + + +/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer +/// derived from the alloca, we can often still split the alloca into elements. +/// This is useful if we have a large alloca where one element is phi'd +/// together somewhere: we can SRoA and promote all the other elements even if +/// we end up not being able to promote this one. +/// +/// All we require is that the uses of the PHI do not index into other parts of +/// the alloca. The most important use case for this is single load and stores +/// that are PHI'd together, which can happen due to code sinking. +void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, + AllocaInfo &Info) { + // If we've already checked this PHI, don't do it again. + if (PHINode *PN = dyn_cast<PHINode>(I)) + if (!Info.CheckedPHIs.insert(PN)) + return; + + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { + isSafePHISelectUseForScalarRepl(BC, Offset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + // Only allow "bitcast" GEPs for simplicity. We could generalize this, + // but would have to prove that we're staying inside of an element being + // promoted. + if (!GEPI->hasAllZeroIndices()) + return MarkUnsafe(Info, User); + isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (!LI->isVolatile()) { - const Type *LIType = LI->getType(); - isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), - LIType, false, Info); - } else - MarkUnsafe(Info); + if (LI->isVolatile()) + return MarkUnsafe(Info, User); + const Type *LIType = LI->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info, LI, false /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isVolatile() && SI->getOperand(0) != I) { - const Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), - SIType, true, Info); - } else - MarkUnsafe(Info); + if (SI->isVolatile() || SI->getOperand(0) == I) + return MarkUnsafe(Info, User); + + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info, SI, false /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { + isSafePHISelectUseForScalarRepl(User, Offset, Info); } else { - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - MarkUnsafe(Info); + return MarkUnsafe(Info, User); } if (Info.isUnsafe) return; } @@ -1023,7 +1442,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, /// references, and when the resulting offset corresponds to an element within /// the alloca type. The results are flagged in the Info parameter. Upon /// return, Offset is adjusted as specified by the GEP indices. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, +void SROA::isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info) { gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); if (GEPIt == E) @@ -1038,7 +1457,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); if (!IdxVal) - return MarkUnsafe(Info); + return MarkUnsafe(Info, GEPI); } // Compute the offset due to this GEP and check if the alloca has a @@ -1046,40 +1465,92 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), &Indices[0], Indices.size()); - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) - MarkUnsafe(Info); + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info, GEPI); +} + +/// isHomogeneousAggregate - Check if type T is a struct or array containing +/// elements of the same type (which is always true for arrays). If so, +/// return true with NumElts and EltTy set to the number of elements and the +/// element type, respectively. +static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, + const Type *&EltTy) { + if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + NumElts = AT->getNumElements(); + EltTy = (NumElts == 0 ? 0 : AT->getElementType()); + return true; + } + if (const StructType *ST = dyn_cast<StructType>(T)) { + NumElts = ST->getNumContainedTypes(); + EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); + for (unsigned n = 1; n < NumElts; ++n) { + if (ST->getContainedType(n) != EltTy) + return false; + } + return true; + } + return false; +} + +/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are +/// "homogeneous" aggregates with the same element type and number of elements. +static bool isCompatibleAggregate(const Type *T1, const Type *T2) { + if (T1 == T2) + return true; + + unsigned NumElts1, NumElts2; + const Type *EltTy1, *EltTy2; + if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && + isHomogeneousAggregate(T2, NumElts2, EltTy2) && + NumElts1 == NumElts2 && + EltTy1 == EltTy2) + return true; + + return false; } /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI /// alloca or has an offset and size that corresponds to a component element /// within it. The offset checked here may have been formed from a GEP with a /// pointer bitcasted to a different type. -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, +/// +/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a +/// unit. If false, it only allows accesses known to be in a single element. +void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, const Type *MemOpType, bool isStore, - AllocaInfo &Info) { + AllocaInfo &Info, Instruction *TheAccess, + bool AllowWholeAccess) { // Check if this is a load/store of the entire alloca. - if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - } + if (Offset == 0 && AllowWholeAccess && + MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) { + // This can be safe for MemIntrinsics (where MemOpType is 0) and integer + // loads/stores (which are essentially the same as the MemIntrinsics with + // regard to copying padding between elements). But, if an alloca is + // flagged as both a source and destination of such operations, we'll need + // to check later for padding between elements. + if (!MemOpType || MemOpType->isIntegerTy()) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + return; + } + // This is also safe for references using a type that is compatible with + // the type of the alloca, so that loads/stores can be rewritten using + // insertvalue/extractvalue. + if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { + Info.hasSubelementAccess = true; return; } } // Check if the offset/size correspond to a component within the alloca type. - const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize)) + const Type *T = Info.AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize)) { + Info.hasSubelementAccess = true; return; + } - return MarkUnsafe(Info); + return MarkUnsafe(Info, TheAccess); } /// TypeHasComponent - Return true if T has a component type with the @@ -1116,14 +1587,21 @@ bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { /// instruction. void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast<Instruction>(*UI); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI++); if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { RewriteBitCast(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + continue; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { + continue; + } + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); if (Offset == 0 && @@ -1131,9 +1609,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); // Otherwise the intrinsic can only touch a single element and the // address operand will be updated, so nothing else needs to be done. - } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + continue; + } + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { + + if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: // %res = load { i32, i32 }* %alloc // with: @@ -1155,10 +1637,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // If this is a load of the entire alloca to an integer, rewrite it. RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { + if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { // Replace: // store { i32, i32 } %val, { i32, i32 }* %alloc // with: @@ -1178,6 +1663,26 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // If this is a store of the entire alloca from an integer, rewrite it. RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } + continue; + } + + if (isa<SelectInst>(User) || isa<PHINode>(User)) { + // If we have a PHI user of the alloca itself (as opposed to a GEP or + // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to + // the new pointer. + if (!isa<AllocaInst>(I)) continue; + + assert(Offset == 0 && NewElts[0] && + "Direct alloca use should have a zero offset"); + + // If we have a use of the alloca, we know the derived uses will be + // utilizing just the first element of the scalarized result. Insert a + // bitcast of the first alloca before the user as required. + AllocaInst *NewAI = NewElts[0]; + BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); + NewAI->moveBefore(BCI); + TheUse = BCI; + continue; } } } @@ -1305,7 +1810,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) OtherPtr = OtherPtr->stripPointerCasts(); - + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1316,28 +1821,26 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, DeadInsts.push_back(MI); return; } - + // If the pointer is not the right type, insert a bitcast to the right // type. const Type *NewTy = PointerType::get(AI->getType()->getElementType(), AddrSpace); - + if (OtherPtr->getType() != NewTy) OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } - + // Process each element of the aggregate. - Value *TheFn = MI->getCalledValue(); - const Type *BytePtrTy = MI->getRawDest()->getType(); bool SROADest = MI->getRawDest() == Inst; - + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - + if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; @@ -1353,7 +1856,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); EltOffset = TD->getTypeAllocSize(EltTy)*i; } - + // The alignment of the other pointer is the guaranteed alignment of the // element, which is affected by both the known alignment of the whole // mem intrinsic and the alignment of the element. If the alignment of @@ -1361,10 +1864,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // known alignment is just 4 bytes. OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); } - + Value *EltPtr = NewElts[i]; const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); - + // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { if (isa<MemTransferInst>(MI)) { @@ -1380,7 +1883,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, continue; } assert(isa<MemSetInst>(MI)); - + // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; @@ -1400,7 +1903,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, TotalVal = TotalVal.shl(8); TotalVal |= OneVal; } - + // Convert the integer value to the appropriate type. StoreVal = ConstantInt::get(CI->getContext(), TotalVal); if (ValTy->isPointerTy()) @@ -1408,12 +1911,12 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, else if (ValTy->isFloatingPointTy()) StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); - + // If the requested value was a vector constant, create it. if (EltTy != ValTy) { unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); SmallVector<Constant*, 16> Elts(NumElts, StoreVal); - StoreVal = ConstantVector::get(&Elts[0], NumElts); + StoreVal = ConstantVector::get(Elts); } } new StoreInst(StoreVal, EltPtr, MI); @@ -1422,55 +1925,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // Otherwise, if we're storing a byte variable, use a memset call for // this element. } - - // Cast the element pointer to BytePtrTy. - if (EltPtr->getType() != BytePtrTy) - EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); - - // Cast the other pointer (if we have one) to BytePtrTy. - if (OtherElt && OtherElt->getType() != BytePtrTy) { - // Preserve address space of OtherElt - const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType()); - const PointerType* PTy = cast<PointerType>(BytePtrTy); - if (OtherPTy->getElementType() != PTy->getElementType()) { - Type *NewOtherPTy = PointerType::get(PTy->getElementType(), - OtherPTy->getAddressSpace()); - OtherElt = new BitCastInst(OtherElt, NewOtherPTy, - OtherElt->getNameStr(), MI); - } - } - + unsigned EltSize = TD->getTypeAllocSize(EltTy); - + + IRBuilder<> Builder(MI); + // Finally, insert the meminst for this element. - if (isa<MemTransferInst>(MI)) { - Value *Ops[] = { - SROADest ? EltPtr : OtherElt, // Dest ptr - SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size - // Align - ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), - MI->getVolatileCst() - }; - // In case we fold the address space overloaded memcpy of A to B - // with memcpy of B to C, change the function to be a memcpy of A to C. - const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(), - Ops[2]->getType() }; - Module *M = MI->getParent()->getParent()->getParent(); - TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3); - CallInst::Create(TheFn, Ops, Ops + 5, "", MI); + if (isa<MemSetInst>(MI)) { + Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, + MI->isVolatile()); } else { - assert(isa<MemSetInst>(MI)); - Value *Ops[] = { - EltPtr, MI->getArgOperand(1), // Dest, Value, - ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size - Zero, // Align - ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile - }; - const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; - Module *M = MI->getParent()->getParent()->getParent(); - TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); - CallInst::Create(TheFn, Ops, Ops + 5, "", MI); + assert(isa<MemTransferInst>(MI)); + Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr + Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr + + if (isa<MemCpyInst>(MI)) + Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); + else + Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); } } DeadInsts.push_back(MI); @@ -1486,12 +1958,13 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, Value *SrcVal = SI->getOperand(0); const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + + IRBuilder<> Builder(SI); // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) - SrcVal = new ZExtInst(SrcVal, - IntegerType::get(SI->getContext(), AllocaSizeBits), - "", SI); + SrcVal = Builder.CreateZExt(SrcVal, + IntegerType::get(SI->getContext(), AllocaSizeBits)); DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI << '\n'); @@ -1500,47 +1973,44 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // have different ways to compute the element offset. if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { const StructLayout *Layout = TD->getStructLayout(EltSTy); - + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. const Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); - + if (TD->isBigEndian()) Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); - + Value *EltVal = SrcVal; if (Shift) { Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); - EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, - "sroa.store.elt", SI); + EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); } - + // Truncate down to an integer of the right size. uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); - + // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - + if (FieldSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, - IntegerType::get(SI->getContext(), FieldSizeBits), - "", SI); + EltVal = Builder.CreateTrunc(EltVal, + IntegerType::get(SI->getContext(), FieldSizeBits)); Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). - EltVal = new BitCastInst(EltVal, FieldTy, "", SI); + EltVal = Builder.CreateBitCast(EltVal, FieldTy); } else { // Otherwise, bitcast the dest pointer (for aggregates). - DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), - "", SI); + DestField = Builder.CreateBitCast(DestField, + PointerType::getUnqual(EltVal->getType())); } new StoreInst(EltVal, DestField, SI); } - + } else { const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); const Type *ArrayEltTy = ATy->getElementType(); @@ -1548,50 +2018,48 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); uint64_t Shift; - + if (TD->isBigEndian()) Shift = AllocaSizeBits-ElementOffset; - else + else Shift = 0; - + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Ignore zero sized fields like {}, they obviously contain no data. if (ElementSizeBits == 0) continue; - + Value *EltVal = SrcVal; if (Shift) { Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); - EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, - "sroa.store.elt", SI); + EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); } - + // Truncate down to an integer of the right size. if (ElementSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, - IntegerType::get(SI->getContext(), - ElementSizeBits),"",SI); + EltVal = Builder.CreateTrunc(EltVal, + IntegerType::get(SI->getContext(), + ElementSizeBits)); Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. } else if (ArrayEltTy->isFloatingPointTy() || ArrayEltTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). - EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); + EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); } else { // Otherwise, bitcast the dest pointer (for aggregates). - DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), - "", SI); + DestField = Builder.CreateBitCast(DestField, + PointerType::getUnqual(EltVal->getType())); } new StoreInst(EltVal, DestField, SI); - + if (TD->isBigEndian()) Shift -= ElementOffset; - else + else Shift += ElementOffset; } } - + DeadInsts.push_back(SI); } @@ -1603,10 +2071,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // and form the result value. const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - + DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); - + // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. const StructLayout *Layout = 0; @@ -1616,11 +2084,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, } else { const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); - } - - Value *ResultVal = + } + + Value *ResultVal = Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); - + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Load the value from the alloca. If the NewElt is an aggregate, cast // the pointer to an integer of the same size before doing the load. @@ -1628,11 +2096,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); - + // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - - const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + + const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && !FieldTy->isVectorTy()) @@ -1650,17 +2118,17 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // we can shift and insert it. if (SrcField->getType() != ResultVal->getType()) SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); - + // Determine the number of bits to shift SrcField. uint64_t Shift; if (Layout) // Struct case. Shift = Layout->getElementOffsetInBits(i); else // Array case. Shift = i*ArrayEltBitOffset; - + if (TD->isBigEndian()) Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); - + if (Shift) { Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); @@ -1683,46 +2151,39 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, } /// HasPadding - Return true if the specified type has any structure or -/// alignment padding, false otherwise. +/// alignment padding in between the elements that would be split apart +/// by SROA; return false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) - return HasPadding(ATy->getElementType(), TD); - - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) - return HasPadding(VTy->getElementType(), TD); - - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TD.getStructLayout(STy); - unsigned PrevFieldBitOffset = 0; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - unsigned FieldBitOffset = SL->getElementOffsetInBits(i); - - // Padding in sub-elements? - if (HasPadding(STy->getElementType(i), TD)) - return true; + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Ty = ATy->getElementType(); + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); + } - // Check to see if there is any padding between this element and the - // previous one. - if (i) { - unsigned PrevFieldEnd = + // SROA currently handles only Arrays and Structs. + const StructType *STy = cast<StructType>(Ty); + const StructLayout *SL = TD.getStructLayout(STy); + unsigned PrevFieldBitOffset = 0; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned FieldBitOffset = SL->getElementOffsetInBits(i); + + // Check to see if there is any padding between this element and the + // previous one. + if (i) { + unsigned PrevFieldEnd = PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); - if (PrevFieldEnd < FieldBitOffset) - return true; - } - - PrevFieldBitOffset = FieldBitOffset; - } - - // Check for tail padding. - if (unsigned EltCount = STy->getNumElements()) { - unsigned PrevFieldEnd = PrevFieldBitOffset + - TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); - if (PrevFieldEnd < SL->getSizeInBits()) + if (PrevFieldEnd < FieldBitOffset) return true; } + PrevFieldBitOffset = FieldBitOffset; } - - return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); + // Check for tail padding. + if (unsigned EltCount = STy->getNumElements()) { + unsigned PrevFieldEnd = PrevFieldBitOffset + + TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); + if (PrevFieldEnd < SL->getSizeInBits()) + return true; + } + return false; } /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of @@ -1731,14 +2192,14 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // Loop over the use list of the alloca. We can only transform it if all of // the users are safe to transform. - AllocaInfo Info; - - isSafeForScalarRepl(AI, AI, 0, Info); + AllocaInfo Info(AI); + + isSafeForScalarRepl(AI, 0, Info); if (Info.isUnsafe) { DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); return false; } - + // Okay, we know all the users are promotable. If the aggregate is a memcpy // source and destination, we have to be careful. In particular, the memcpy // could be moving around elements that live in structure padding of the LLVM @@ -1748,6 +2209,20 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { HasPadding(AI->getAllocatedType(), *TD)) return false; + // If the alloca never has an access to just *part* of it, but is accessed + // via loads and stores, then we should use ConvertToScalarInfo to promote + // the alloca instead of promoting each piece at a time and inserting fission + // and fusion code. + if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { + // If the struct/array just has one element, use basic SRoA. + if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (ST->getNumElements() > 1) return false; + } else { + if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) + return false; + } + } + return true; } @@ -1760,7 +2235,7 @@ static bool PointsToConstantGlobal(Value *V) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return GV->isConstant(); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || + if (CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::GetElementPtr) return PointsToConstantGlobal(CE->getOperand(0)); return false; @@ -1771,18 +2246,19 @@ static bool PointsToConstantGlobal(Value *V) { /// see any stores or other unknown uses. If we see pointer arithmetic, keep /// track of whether it moves the pointer (with isOffset) but otherwise traverse /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to -/// the alloca, and if the source pointer is a pointer to a constant global, we +/// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, bool isOffset) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); - if (LoadInst *LI = dyn_cast<LoadInst>(U)) + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Ignore non-volatile loads, they are always ok. - if (!LI->isVolatile()) - continue; - + if (LI->isVolatile()) return false; + continue; + } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) @@ -1797,27 +2273,52 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, return false; continue; } - + + if (CallSite CS = U) { + // If this is a readonly/readnone call site, then we know it is just a + // load and we can ignore it. + if (CS.onlyReadsMemory()) + continue; + + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) + continue; + } + // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); if (MI == 0) return false; + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + // If we already have seen a copy, reject the second one. if (TheCopy) return false; - + // If the pointer has been offset from the start of the alloca, we can't // safely handle this. if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. if (UI.getOperandNo() != 0) return false; - + // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) return false; - + // Otherwise, the transform is safe. Remember the copy instruction. TheCopy = MI; } diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 360749c..ce5dd73 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -42,7 +42,9 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGSimplifyPass() : FunctionPass(ID) {} + CFGSimplifyPass() : FunctionPass(ID) { + initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); }; @@ -50,7 +52,7 @@ namespace { char CFGSimplifyPass::ID = 0; INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg", - "Simplify the CFG", false, false); + "Simplify the CFG", false, false) // Public interface to the CFGSimplification pass FunctionPass *llvm::createCFGSimplificationPass() { diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 3ec70ec..70ff32e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -32,7 +32,9 @@ namespace { const TargetData *TD; public: static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(ID) {} + SimplifyHalfPowrLibCalls() : FunctionPass(ID) { + initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); @@ -47,7 +49,7 @@ namespace { } // end anonymous namespace. INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", - "Simplify half_powr library calls", false, false); + "Simplify half_powr library calls", false, false) // Public interface to the Simplify HalfPowr LibCalls pass. FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { @@ -95,7 +97,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, InlineFunctionInfo IFI(0, TD); bool B = InlineFunction(Call, IFI); - assert(B && "half_powr didn't inline?"); B=B; + assert(B && "half_powr didn't inline?"); + (void)B; BasicBlock *NewBody = NewBlock->getSinglePredecessor(); assert(NewBody); diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index d7ce53f..ec45b71 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -123,7 +123,7 @@ struct StrCatOpt : public LibCallOptimization { // Verify the "strcat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Type::getInt8PtrTy(*Context) || + FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) return 0; @@ -160,9 +160,8 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - EmitMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len+1), - 1, false, B, TD); + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); } }; @@ -174,7 +173,7 @@ struct StrNCatOpt : public StrCatOpt { // Verify the "strncat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - FT->getReturnType() != Type::getInt8PtrTy(*Context) || + FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !FT->getParamType(2)->isIntegerTy()) @@ -222,8 +221,9 @@ struct StrChrOpt : public LibCallOptimization { // Verify the "strchr" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Type::getInt8PtrTy(*Context) || - FT->getParamType(0) != FT->getReturnType()) + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) return 0; Value *SrcStr = CI->getArgOperand(0); @@ -252,22 +252,55 @@ struct StrChrOpt : public LibCallOptimization { // strchr can find the nul character. Str += '\0'; - char CharValue = CharC->getSExtValue(); // Compute the offset. - uint64_t i = 0; - while (1) { - if (i == Str.size()) // Didn't find the char. strchr returns null. - return Constant::getNullValue(CI->getType()); - // Did we find our match? - if (Str[i] == CharValue) - break; - ++i; - } + size_t I = Str.find(CharC->getSExtValue()); + if (I == std::string::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); // strchr(s+n,c) -> gep(s+n+i,c) - Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i); - return B.CreateGEP(SrcStr, Idx, "strchr"); + return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + } +}; + +//===---------------------------------------===// +// 'strrchr' Optimizations + +struct StrRChrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strrchr" function prototype. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return 0; + + std::string Str; + if (!GetConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (TD && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TD); + return 0; + } + + // strrchr can find the nul character. + Str += '\0'; + + // Compute the offset. + size_t I = Str.rfind(CharC->getSExtValue()); + if (I == std::string::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); } }; @@ -281,7 +314,7 @@ struct StrCmpOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(*Context)) + FT->getParamType(0) != B.getInt8PtrTy()) return 0; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); @@ -329,7 +362,7 @@ struct StrNCmpOpt : public LibCallOptimization { if (FT->getNumParams() != 3 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(*Context) || + FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) return 0; @@ -384,7 +417,7 @@ struct StrCpyOpt : public LibCallOptimization { if (FT->getNumParams() != NumParams || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(*Context)) + FT->getParamType(0) != B.getInt8PtrTy()) return 0; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); @@ -405,9 +438,8 @@ struct StrCpyOpt : public LibCallOptimization { ConstantInt::get(TD->getIntPtrType(*Context), Len), CI->getArgOperand(2), B, TD); else - EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - 1, false, B, TD); + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); return Dst; } }; @@ -420,7 +452,7 @@ struct StrNCpyOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Type::getInt8PtrTy(*Context) || + FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) return 0; @@ -435,8 +467,7 @@ struct StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) - EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), - LenOp, false, B, TD); + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); return Dst; } @@ -455,9 +486,8 @@ struct StrNCpyOpt : public LibCallOptimization { if (Len > SrcLen+1) return 0; // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - 1, false, B, TD); + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); return Dst; } @@ -470,7 +500,7 @@ struct StrLenOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || - FT->getParamType(0) != Type::getInt8PtrTy(*Context) || + FT->getParamType(0) != B.getInt8PtrTy() || !FT->getReturnType()->isIntegerTy()) return 0; @@ -488,6 +518,45 @@ struct StrLenOpt : public LibCallOptimization { } }; + +//===---------------------------------------===// +// 'strpbrk' Optimizations + +struct StrPBrkOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + FT->getReturnType() != FT->getParamType(0)) + return 0; + + std::string S1, S2; + bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> NULL + // strpbrk("", s) -> NULL + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == std::string::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (TD && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD); + + return 0; + } +}; + //===---------------------------------------===// // 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. @@ -501,7 +570,8 @@ struct StrToOpt : public LibCallOptimization { Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { - CI->setOnlyReadsMemory(); + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. CI->addAttribute(1, Attribute::NoCapture); } @@ -510,6 +580,67 @@ struct StrToOpt : public LibCallOptimization { }; //===---------------------------------------===// +// 'strspn' Optimizations + +struct StrSpnOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + std::string S1, S2; + bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) + return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str())); + + return 0; + } +}; + +//===---------------------------------------===// +// 'strcspn' Optimizations + +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + std::string S1, S2; + bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) + return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str())); + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD); + + return 0; + } +}; + +//===---------------------------------------===// // 'strstr' Optimizations struct StrStrOpt : public LibCallOptimization { @@ -637,8 +768,8 @@ struct MemCpyOpt : public LibCallOptimization { return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1, false, B, TD); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); return CI->getArgOperand(0); } }; @@ -659,8 +790,8 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1, false, B, TD); + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); return CI->getArgOperand(0); } }; @@ -681,9 +812,8 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), - Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); } }; @@ -765,12 +895,10 @@ struct Exp2Opt : public LibCallOptimization { Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp"); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp"); } if (LdExpArg) { @@ -789,7 +917,7 @@ struct Exp2Opt : public LibCallOptimization { Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), - Type::getInt32Ty(*Context),NULL); + B.getInt32Ty(), NULL); CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -819,7 +947,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { Value *V = Cast->getOperand(0); V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B, Callee->getAttributes()); - return B.CreateFPExt(V, Type::getDoubleTy(*Context)); + return B.CreateFPExt(V, B.getDoubleTy()); } }; @@ -846,8 +974,8 @@ struct FFSOpt : public LibCallOptimization { if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { if (CI->getValue() == 0) // ffs(0) -> 0. return Constant::getNullValue(CI->getType()); - return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1 - CI->getValue().countTrailingZeros()+1); + // ffs(c) -> cttz(c)+1 + return B.getInt32(CI->getValue().countTrailingZeros() + 1); } // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 @@ -856,11 +984,10 @@ struct FFSOpt : public LibCallOptimization { Intrinsic::cttz, &ArgType, 1); Value *V = B.CreateCall(F, Op, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); - V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp"); + V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp"); Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); - return B.CreateSelect(Cond, V, - ConstantInt::get(Type::getInt32Ty(*Context), 0)); + return B.CreateSelect(Cond, V, B.getInt32(0)); } }; @@ -877,10 +1004,8 @@ struct IsDigitOpt : public LibCallOptimization { // isdigit(c) -> (c-'0') <u 10 Value *Op = CI->getArgOperand(0); - Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), - "isdigittmp"); - Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), - "isdigit"); + Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); return B.CreateZExt(Op, CI->getType()); } }; @@ -898,8 +1023,7 @@ struct IsAsciiOpt : public LibCallOptimization { // isascii(c) -> c <u 128 Value *Op = CI->getArgOperand(0); - Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), - "isascii"); + Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); return B.CreateZExt(Op, CI->getType()); } }; @@ -917,8 +1041,7 @@ struct AbsOpt : public LibCallOptimization { // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getArgOperand(0); - Value *Pos = B.CreateICmpSGT(Op, - Constant::getAllOnesValue(Op->getType()), + Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); Value *Neg = B.CreateNeg(Op, "neg"); return B.CreateSelect(Pos, Op, Neg); @@ -969,11 +1092,15 @@ struct PrintFOpt : public LibCallOptimization { return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0); - // printf("x") -> putchar('x'), even for '%'. Return the result of putchar - // in case there is an error writing to stdout. + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return 0; + + // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), - FormatStr[0]), B, TD); + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1004,8 +1131,7 @@ struct PrintFOpt : public LibCallOptimization { // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isPointerTy() && - CI->use_empty()) { + CI->getArgOperand(1)->getType()->isPointerTy()) { EmitPutS(CI->getArgOperand(1), B, TD); return CI; } @@ -1042,9 +1168,9 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the - ConstantInt::get(TD->getIntPtrType(*Context), // nul byte. - FormatStr.size() + 1), 1, false, B, TD); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + FormatStr.size() + 1), 1); // nul byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1058,13 +1184,11 @@ struct SPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getArgOperand(2), - Type::getInt8Ty(*Context), "char"); + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), - "nul"); - B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); + Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); return ConstantInt::get(CI->getType(), 1); } @@ -1080,8 +1204,7 @@ struct SPrintFOpt : public LibCallOptimization { Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), - IncLen, 1, false, B, TD); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1208,6 +1331,34 @@ struct FPrintFOpt : public LibCallOptimization { } }; +//===---------------------------------------===// +// 'puts' Optimizations + +struct PutsOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + // Check for a constant string. + std::string Str; + if (!GetConstantStringInfo(CI->getArgOperand(0), Str)) + return 0; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD); + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return 0; + } +}; + } // end anonymous namespace. //===----------------------------------------------------------------------===// @@ -1220,10 +1371,10 @@ namespace { class SimplifyLibCalls : public FunctionPass { StringMap<LibCallOptimization*> Optimizations; // String and Memory LibCall Optimizations - StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; - StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; - StrNCpyOpt StrNCpy; StrLenOpt StrLen; - StrToOpt StrTo; StrStrOpt StrStr; + StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; + StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; + StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk; + StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; @@ -1233,11 +1384,14 @@ namespace { // Formatting and IO Optimizations SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; + PutsOpt Puts; bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {} + SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) { + initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); + } void InitOptimizations(); bool runOnFunction(Function &F); @@ -1255,7 +1409,7 @@ namespace { } // end anonymous namespace. INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls", - "Simplify well-known library calls", false, false); + "Simplify well-known library calls", false, false) // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { @@ -1269,11 +1423,13 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strcat"] = &StrCat; Optimizations["strncat"] = &StrNCat; Optimizations["strchr"] = &StrChr; + Optimizations["strrchr"] = &StrRChr; Optimizations["strcmp"] = &StrCmp; Optimizations["strncmp"] = &StrNCmp; Optimizations["strcpy"] = &StrCpy; Optimizations["strncpy"] = &StrNCpy; Optimizations["strlen"] = &StrLen; + Optimizations["strpbrk"] = &StrPBrk; Optimizations["strtol"] = &StrTo; Optimizations["strtod"] = &StrTo; Optimizations["strtof"] = &StrTo; @@ -1281,6 +1437,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strtoll"] = &StrTo; Optimizations["strtold"] = &StrTo; Optimizations["strtoull"] = &StrTo; + Optimizations["strspn"] = &StrSpn; + Optimizations["strcspn"] = &StrCSpn; Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; Optimizations["memcpy"] = &MemCpy; @@ -1341,6 +1499,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["fwrite"] = &FWrite; Optimizations["fputs"] = &FPuts; Optimizations["fprintf"] = &FPrintF; + Optimizations["puts"] = &Puts; } @@ -2155,9 +2314,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * pow(sqrt(x),y) -> pow(x,y*0.5) // * pow(pow(x,y),z)-> pow(x,y*z) // -// puts: -// * puts("") -> putchar('\n') -// // round, roundf, roundl: // * round(cnst) -> cnst' // @@ -2173,24 +2329,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // stpcpy: // * stpcpy(str, "literal") -> // llvm.memcpy(str,"literal",strlen("literal")+1,1) -// strrchr: -// * strrchr(s,c) -> reverse_offset_of_in(c,s) -// (if c is a constant integer and s is a constant string) -// * strrchr(s1,0) -> strchr(s1,0) -// -// strpbrk: -// * strpbrk(s,a) -> offset_in_for(s,a) -// (if s and a are both constant strings) -// * strpbrk(s,"") -> 0 -// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1) -// -// strspn, strcspn: -// * strspn(s,a) -> const_int (if both args are constant) -// * strspn("",a) -> 0 -// * strspn(s,"") -> 0 -// * strcspn(s,a) -> const_int (if both args are constant) -// * strcspn("",a) -> 0 -// * strcspn(s,"") -> strlen(a) // // tan, tanf, tanl: // * tan(atan(x)) -> x diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp index 95d3ded..705f442 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp @@ -35,7 +35,9 @@ namespace { public: static char ID; // Pass identification - Sinking() : FunctionPass(ID) {} + Sinking() : FunctionPass(ID) { + initializeSinkingPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -56,7 +58,11 @@ namespace { } // end anonymous namespace char Sinking::ID = 0; -INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false); +INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false) FunctionPass *llvm::createSinkingPass() { return new Sinking(); } @@ -150,11 +156,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { if (L->isVolatile()) return false; - Value *Ptr = L->getPointerOperand(); - unsigned Size = AA->getTypeStoreSize(L->getType()); + AliasAnalysis::Location Loc = AA->getLocation(L); for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(), E = Stores.end(); I != E; ++I) - if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod) + if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod) return false; } @@ -163,7 +168,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, return false; } - return Inst->isSafeToSpeculativelyExecute(); + if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst)) + return false; + + return true; } /// SinkInstruction - Determine whether it is safe to sink the specified machine diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp index 2e437ac..9dd83c0 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp @@ -26,14 +26,14 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Type.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CFG.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/Utils/Local.h" #include <map> using namespace llvm; @@ -49,7 +49,9 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - TailDup() : FunctionPass(ID) {} + TailDup() : FunctionPass(ID) { + initializeTailDupPass(*PassRegistry::getPassRegistry()); + } private: inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); @@ -59,7 +61,7 @@ namespace { } char TailDup::ID = 0; -INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false); +INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false) // Public interface to the Tail Duplication pass FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } @@ -360,8 +362,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { Instruction *Inst = BI++; if (isInstructionTriviallyDead(Inst)) Inst->eraseFromParent(); - else if (Constant *C = ConstantFoldInstruction(Inst)) { - Inst->replaceAllUsesWith(C); + else if (Value *V = SimplifyInstruction(Inst)) { + Inst->replaceAllUsesWith(V); Inst->eraseFromParent(); } } diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 3717254..5b6bc04 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -52,31 +52,52 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumEliminated, "Number of tail calls removed"); +STATISTIC(NumRetDuped, "Number of return duplicated"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid - TailCallElim() : FunctionPass(ID) {} + TailCallElim() : FunctionPass(ID) { + initializeTailCallElimPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); private: + CallInst *FindTRECandidate(Instruction *I, + bool CannotTailCallElimCallsMarkedTail); + bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); + bool FoldReturnAndProcessPred(BasicBlock *BB, + ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVector<PHINode*, 8> &ArgumentPHIs, @@ -88,7 +109,7 @@ namespace { char TailCallElim::ID = 0; INITIALIZE_PASS(TailCallElim, "tailcallelim", - "Tail Call Elimination", false, false); + "Tail Call Elimination", false, false) // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { @@ -133,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) { bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; - bool FunctionContainsEscapingAllocas = false; // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls @@ -160,10 +180,17 @@ bool TailCallElim::runOnFunction(Function &F) { return false; // Second pass, change any tail calls to loops. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) - MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,CannotTCETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + CannotTCETailMarkedCall); + MadeChange |= Change; + } + } // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) @@ -175,7 +202,7 @@ bool TailCallElim::runOnFunction(Function &F) { PHINode *PN = ArgumentPHIs[i]; // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = PN->hasConstantValue()) { + if (Value *PNV = SimplifyInstruction(PN)) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } @@ -322,41 +349,47 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI); } -bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVector<PHINode*, 8> &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail) { - BasicBlock *BB = Ret->getParent(); +static Instruction *FirstNonDbg(BasicBlock::iterator I) { + while (isa<DbgInfoIntrinsic>(I)) + ++I; + return &*I; +} + +CallInst* +TailCallElim::FindTRECandidate(Instruction *TI, + bool CannotTailCallElimCallsMarkedTail) { + BasicBlock *BB = TI->getParent(); Function *F = BB->getParent(); - if (&BB->front() == Ret) // Make sure there is something before the ret... - return false; + if (&BB->front() == TI) // Make sure there is something before the terminator. + return 0; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. - CallInst *CI; - BasicBlock::iterator BBI = Ret; - while (1) { + CallInst *CI = 0; + BasicBlock::iterator BBI = TI; + while (true) { CI = dyn_cast<CallInst>(BBI); if (CI && CI->getCalledFunction() == F) break; if (BBI == BB->begin()) - return false; // Didn't find a potential tail call. + return 0; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return false; + return 0; // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will // lower the call to fabs into inline code. if (BB == &F->getEntryBlock() && - &BB->front() == CI && &*++BB->begin() == Ret && + FirstNonDbg(BB->front()) == CI && + FirstNonDbg(llvm::next(BB->begin())) == TI && callIsSmall(F)) { // A single-block function with just a call and a return. Check that // the arguments match. @@ -367,9 +400,17 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, for (; I != E && FI != FE; ++I, ++FI) if (*I != &*FI) break; if (I == E && FI == FE) - return false; + return 0; } + return CI; +} + +bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -387,7 +428,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. - for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) { + BasicBlock::iterator BBI = CI; + for (++BBI; &*BBI != Ret; ++BBI) { if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it @@ -424,6 +466,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; } + BasicBlock *BB = Ret->getParent(); + Function *F = BB->getParent(); + // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { @@ -533,3 +578,53 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, ++NumEliminated; return true; } + +bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB, + ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + bool Change = false; + + // If the return block contains nothing but the return and PHI's, + // there might be an opportunity to duplicate the return in its + // predecessors and perform TRC there. Look for predecessors that end + // in unconditional branch and recursive call(s). + SmallVector<BranchInst*, 8> UncondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *Pred = *PI; + TerminatorInst *PTI = Pred->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) + if (BI->isUnconditional()) + UncondBranchPreds.push_back(BI); + } + + while (!UncondBranchPreds.empty()) { + BranchInst *BI = UncondBranchPreds.pop_back_val(); + BasicBlock *Pred = BI->getParent(); + if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){ + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred), + OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, + CannotTailCallElimCallsMarkedTail); + ++NumRetDuped; + Change = true; + } + } + + return Change; +} + +bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); + if (!CI) + return false; + + return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, + CannotTailCallElimCallsMarkedTail); +} diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp index 4d64c85..be7bed1 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CallSite.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -379,27 +380,10 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { /// return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI) { - std::vector<InlineAsm::ConstraintInfo> - Constraints = IA->ParseConstraints(); - - unsigned ArgNo = 0; // The argument of the CallInst. - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); - - // Compute the value type for each operand. - switch (OpInfo.Type) { - case InlineAsm::isOutput: - if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); - break; - case InlineAsm::isInput: - OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); - break; - case InlineAsm::isClobber: - // Nothing to do. - break; - } - + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, SDValue()); @@ -584,7 +568,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, MemoryInst, Result); Matcher.IgnoreProfitability = true; bool Success = Matcher.MatchAddr(Address, 0); - Success = Success; assert(Success && "Couldn't select *anything*?"); + (void)Success; assert(Success && "Couldn't select *anything*?"); // If the match didn't cover I, then it won't be shared by it. if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 093083a..acaea19 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -19,8 +19,9 @@ #include "llvm/Constant.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" @@ -63,12 +64,27 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) { +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { + if (!isa<PHINode>(BB->begin())) return; + + AliasAnalysis *AA = 0; + MemoryDependenceAnalysis *MemDep = 0; + if (P) { + AA = P->getAnalysisIfAvailable<AliasAnalysis>(); + MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); + } + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + + if (MemDep) + MemDep->removeInstruction(PN); // Memdep updates AA itself. + else if (AA && isa<PointerType>(PN->getType())) + AA->deleteValue(PN); + PN->eraseFromParent(); } } @@ -110,7 +126,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { if (isa<InvokeInst>(PredBB->getTerminator())) return false; succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); - BasicBlock* OnlySucc = BB; + BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) if (*SI != OnlySucc) { OnlySucc = 0; // There are multiple distinct successors! @@ -131,10 +147,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { } // Begin by getting rid of unneeded PHIs. - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - BB->getInstList().pop_front(); // Delete the phi node... - } + if (isa<PHINode>(BB->front())) + FoldSingleEntryPHINodes(BB, P); // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -152,24 +166,27 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Finally, erase the old block and update dominator info. if (P) { - if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) { - DomTreeNode* DTN = DT->getNode(BB); - DomTreeNode* PredDTN = DT->getNode(PredBB); - - if (DTN) { - SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); - for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(), + if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(PredBB); + SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); + for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(), DE = Children.end(); DI != DE; ++DI) DT->changeImmediateDominator(*DI, PredDTN); DT->eraseNode(BB); } + + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) + LI->removeBlock(BB); + + if (MemoryDependenceAnalysis *MD = + P->getAnalysisIfAvailable<MemoryDependenceAnalysis>()) + MD->invalidateCachedPredecessors(); } } BB->eraseFromParent(); - - return true; } @@ -218,52 +235,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); } -/// RemoveSuccessor - Change the specified terminator instruction such that its -/// successor SuccNum no longer exists. Because this reduces the outgoing -/// degree of the current basic block, the actual terminator instruction itself -/// may have to be changed. In the case where the last successor of the block -/// is deleted, a return instruction is inserted in its place which can cause a -/// surprising change in program behavior if it is not expected. -/// -void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { - assert(SuccNum < TI->getNumSuccessors() && - "Trying to remove a nonexistant successor!"); - - // If our old successor block contains any PHI nodes, remove the entry in the - // PHI nodes that comes from this branch... - // - BasicBlock *BB = TI->getParent(); - TI->getSuccessor(SuccNum)->removePredecessor(BB); - - TerminatorInst *NewTI = 0; - switch (TI->getOpcode()) { - case Instruction::Br: - // If this is a conditional branch... convert to unconditional branch. - if (TI->getNumSuccessors() == 2) { - cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum)); - } else { // Otherwise convert to a return instruction... - Value *RetVal = 0; - - // Create a value to return... if the function doesn't return null... - if (!BB->getParent()->getReturnType()->isVoidTy()) - RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); - - // Create the return... - NewTI = ReturnInst::Create(TI->getContext(), RetVal); - } - break; - - case Instruction::Invoke: // Should convert to call - case Instruction::Switch: // Should remove entry - default: - case Instruction::Ret: // Cannot happen, has no successors! - llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!"); - } - - if (NewTI) // If it's a different instruction, replace. - ReplaceInstWithInst(TI, NewTI); -} - /// GetSuccessorNumber - Search for the specified successor of basic block BB /// and return its position in the terminator instruction's list of /// successors. It is an error to call this with a block that is not a @@ -300,13 +271,13 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { assert(SP == BB && "CFG broken"); SP = NULL; return SplitBlock(Succ, Succ->begin(), P); - } else { - // Otherwise, if BB has a single successor, split it at the bottom of the - // block. - assert(BB->getTerminator()->getNumSuccessors() == 1 && - "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), P); } + + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), P); } /// SplitBlock - Split the specified block at the specified instruction - every @@ -322,12 +293,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. - if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>()) + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { - // Old dominates New. New node domiantes all other nodes dominated by Old. + // Old dominates New. New node dominates all other nodes dominated by Old. DomTreeNode *OldNode = DT->getNode(Old); std::vector<DomTreeNode *> Children; for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); @@ -340,9 +311,6 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { DT->changeImmediateDominator(*I, NewNode); } - if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) - DF->splitBlock(Old); - return New; } @@ -354,10 +322,9 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. -/// In particular, it does not preserve LoopSimplify (because it's -/// complicated to handle the case where one of the edges being split -/// is an exit of a loop with other exits). +/// LoopInfo, and LCCSA but no other analyses. In particular, it does not +/// preserve LoopSimplify (because it's complicated to handle the case where one +/// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, @@ -407,13 +374,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, } } - // Update dominator tree and dominator frontier if available. + // Update dominator tree if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); - if (DominanceFrontier *DF = - P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0) - DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds @@ -545,7 +509,32 @@ void llvm::FindFunctionBackedges(const Function &F, // Go up one level. InStack.erase(VisitStack.pop_back_val().first); } - } while (!VisitStack.empty()); - - + } while (!VisitStack.empty()); +} + +/// FoldReturnIntoUncondBranch - This method duplicates the specified return +/// instruction into a predecessor which ends in an unconditional branch. If +/// the return instruction returns a value defined by a PHI, propagate the +/// right value into the return. It returns the new return instruction in the +/// predecessor. +ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred) { + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); + i != e; ++i) + if (PHINode *PN = dyn_cast<PHINode>(*i)) + if (PN->getParent() == BB) + *i = PN->getIncomingValueForBlock(Pred); + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + UncondBranch->eraseFromParent(); + return cast<ReturnInst>(NewRet); } diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index f75ffe6..616b066 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -11,8 +11,7 @@ // inserting a dummy basic block. This pass may be "required" by passes that // cannot deal with critical edges. For this usage, the structure type is // forward declared. This pass obviously invalidates the CFG, but can update -// forward dominator (set, immediate dominators, tree, and frontier) -// information. +// dominator trees. // //===----------------------------------------------------------------------===// @@ -36,13 +35,14 @@ STATISTIC(NumBroken, "Number of blocks inserted"); namespace { struct BreakCriticalEdges : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(ID) {} + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); AU.addPreserved<LoopInfo>(); AU.addPreserved<ProfileInfo>(); @@ -54,7 +54,7 @@ namespace { char BreakCriticalEdges::ID = 0; INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", - "Break critical edges in CFG", false, false); + "Break critical edges in CFG", false, false) // Publically exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; @@ -150,10 +150,9 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, } /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to -/// split the critical edge. This will update DominatorTree and -/// DominatorFrontier information if it is available, thus calling this pass -/// will not invalidate either of them. This returns the new block if the edge -/// was split, null otherwise. +/// split the critical edge. This will update DominatorTree information if it +/// is available, thus calling this pass will not invalidate either of them. +/// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. @@ -255,12 +254,11 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); - DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. - if (DT == 0 && DF == 0 && LI == 0 && PI == 0) + if (DT == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -281,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) - OtherPreds.push_back(P); + OtherPreds.push_back(P); } } @@ -318,40 +316,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } } - // Should we update DominanceFrontier information? - if (DF) { - // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. - if (!OtherPreds.empty()) { - // FIXME: IMPLEMENT THIS! - llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." - " not implemented yet!"); - } - - // Since the new block is dominated by its only predecessor TIBB, - // it cannot be in any block's dominance frontier. If NewBB dominates - // DestBB, its dominance frontier is the same as DestBB's, otherwise it is - // just {DestBB}. - DominanceFrontier::DomSetType NewDFSet; - if (NewBBDominatesDestBB) { - DominanceFrontier::iterator I = DF->find(DestBB); - if (I != DF->end()) { - DF->addBasicBlock(NewBB, I->second); - - if (I->second.count(DestBB)) { - // However NewBB's frontier does not include DestBB. - DominanceFrontier::iterator NF = DF->find(NewBB); - DF->removeFromFrontier(NF, DestBB); - } - } - else - DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); - } else { - DominanceFrontier::DomSetType NewDFSet; - NewDFSet.insert(DestBB); - DF->addBasicBlock(NewBB, NewDFSet); - } - } - // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index c313949..4a90751 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -131,21 +131,6 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, return CI; } - -/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always -/// expects that Len has type 'intptr_t' and Dst/Src are pointers. -Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, - bool isVolatile, IRBuilder<> &B, const TargetData *TD) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); - const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); - return B.CreateCall5(MemCpy, Dst, Src, Len, - ConstantInt::get(B.getInt32Ty(), Align), - ConstantInt::get(B.getInt1Ty(), isVolatile)); -} - /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. @@ -170,22 +155,6 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return CI; } -/// EmitMemMove - Emit a call to the memmove function to the builder. This -/// always expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align, - bool isVolatile, IRBuilder<> &B, const TargetData *TD) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - const Type *ArgTys[3] = { Dst->getType(), Src->getType(), - TD->getIntPtrType(Context) }; - Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); - Value *A = ConstantInt::get(B.getInt32Ty(), Align); - Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile); - return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol); -} - /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, @@ -233,18 +202,6 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, return CI; } -/// EmitMemSet - Emit a call to the memset function -Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile, - IRBuilder<> &B, const TargetData *TD) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Intrinsic::ID IID = Intrinsic::memset; - const Type *Tys[2] = { Dst->getType(), Len->getType() }; - Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2); - Value *Align = ConstantInt::get(B.getInt32Ty(), 1); - Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile); - return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol); -} - /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. /// 'floor'). This function is known to take a single of type matching 'Op' and /// returns one value with the same type. If 'Op' is a long double, 'l' is @@ -422,8 +379,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { return false; if (isFoldable(3, 2, false)) { - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1, false, B, TD); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); replaceCall(CI->getArgOperand(0)); return true; } @@ -445,8 +402,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { return false; if (isFoldable(3, 2, false)) { - EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1, false, B, TD); + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); replaceCall(CI->getArgOperand(0)); return true; } @@ -465,8 +422,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), - false, B, TD); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); replaceCall(CI->getArgOperand(0)); return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index f43186e..d967ceb 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -112,8 +112,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, - CodeInfo); + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); VMap[&BB] = CBB; // Add basic block mapping. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) @@ -122,12 +121,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. - // for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, VMap, ModuleLevelChanges); + RemapInstruction(II, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } /// CloneFunction - Return a copy of the specified function, but without @@ -138,8 +137,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, /// updated to include mappings from all of the instructions and basicblocks in /// the function from their old to new values. /// -Function *llvm::CloneFunction(const Function *F, - ValueToValueMapTy &VMap, +Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, bool ModuleLevelChanges, ClonedCodeInfo *CodeInfo) { std::vector<const Type*> ArgTypes; @@ -216,7 +214,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone){ - Value *&BBEntry = VMap[BB]; + TrackingVH<Value> &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -262,8 +260,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If the condition was a known constant in the callee... ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... - if (Cond == 0) - Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]); + if (Cond == 0) { + Value *V = VMap[BI->getCondition()]; + Cond = dyn_cast_or_null<ConstantInt>(V); + } // Constant fold to uncond branch! if (Cond) { @@ -276,8 +276,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); - if (Cond == 0) // Or known constant after constant prop in the callee... - Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]); + if (Cond == 0) { // Or known constant after constant prop in the callee... + Value *V = VMap[SI->getCondition()]; + Cond = dyn_cast_or_null<ConstantInt>(V); + } if (Cond) { // Constant fold to uncond branch! BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); VMap[OldTI] = BranchInst::Create(Dest, NewBB); @@ -318,7 +320,8 @@ ConstantFoldMappedInstruction(const Instruction *I) { SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - VMap, ModuleLevelChanges))) + VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges))) Ops.push_back(Op); else return 0; // All operands not constant! @@ -394,7 +397,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { - BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]); + Value *V = VMap[BI]; + BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. @@ -455,7 +459,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, I->setDebugLoc(DebugLoc()); } } - RemapInstruction(I, VMap, ModuleLevelChanges); + RemapInstruction(I, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } } @@ -474,10 +479,11 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { - if (BasicBlock *MappedBlock = - cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { + Value *V = VMap[PN->getIncomingBlock(pred)]; + if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap, ModuleLevelChanges); + VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp index 551b630..87dd141 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp @@ -19,15 +19,14 @@ using namespace llvm; -/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available, -/// dominance info. It is expected that basic block is already cloned. +/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected +/// that the basic block is already cloned. static void CloneDominatorInfo(BasicBlock *BB, - ValueMap<const Value *, Value *> &VMap, - DominatorTree *DT, - DominanceFrontier *DF) { + ValueToValueMapTy &VMap, + DominatorTree *DT) { assert (DT && "DominatorTree is not available"); - ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB); + ValueToValueMapTy::iterator BI = VMap.find(BB); assert (BI != VMap.end() && "BasicBlock clone is missing"); BasicBlock *NewBB = cast<BasicBlock>(BI->second); @@ -42,45 +41,23 @@ static void CloneDominatorInfo(BasicBlock *BB, // NewBB's dominator is either BB's dominator or BB's dominator's clone. BasicBlock *NewBBDom = BBDom; - ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom); + ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom); if (BBDomI != VMap.end()) { NewBBDom = cast<BasicBlock>(BBDomI->second); if (!DT->getNode(NewBBDom)) - CloneDominatorInfo(BBDom, VMap, DT, DF); + CloneDominatorInfo(BBDom, VMap, DT); } DT->addNewBlock(NewBB, NewBBDom); - - // Copy cloned dominance frontiner set - if (DF) { - DominanceFrontier::DomSetType NewDFSet; - DominanceFrontier::iterator DFI = DF->find(BB); - if ( DFI != DF->end()) { - DominanceFrontier::DomSetType S = DFI->second; - for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end(); - I != E; ++I) { - BasicBlock *DB = *I; - ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB); - if (IDM != VMap.end()) - NewDFSet.insert(cast<BasicBlock>(IDM->second)); - else - NewDFSet.insert(DB); - } - } - DF->addBasicBlock(NewBB, NewDFSet); - } } /// CloneLoop - Clone Loop. Clone dominator info. Populate VMap /// using old blocks to new blocks mapping. Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, - ValueMap<const Value *, Value *> &VMap, Pass *P) { + ValueToValueMapTy &VMap, Pass *P) { DominatorTree *DT = NULL; - DominanceFrontier *DF = NULL; - if (P) { + if (P) DT = P->getAnalysisIfAvailable<DominatorTree>(); - DF = P->getAnalysisIfAvailable<DominanceFrontier>(); - } SmallVector<BasicBlock *, 16> NewBlocks; @@ -116,7 +93,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - CloneDominatorInfo(BB, VMap, DT, DF); + CloneDominatorInfo(BB, VMap, DT); } // Process sub loops @@ -134,7 +111,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (unsigned index = 0, num_ops = Insn->getNumOperands(); index != num_ops; ++index) { Value *Op = Insn->getOperand(index); - ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op); + ValueToValueMapTy::iterator OpItr = VMap.find(Op); if (OpItr != VMap.end()) Insn->setOperand(index, OpItr->second); } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index b347bf5..1046c38 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -89,8 +89,7 @@ Module *llvm::CloneModule(const Module *M, GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - VMap, - true))); + VMap, RF_None))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -121,7 +120,7 @@ Module *llvm::CloneModule(const Module *M, GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, VMap, true))); + GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None))); } // And named metadata.... @@ -130,7 +129,8 @@ Module *llvm::CloneModule(const Module *M, const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true))); + NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, + RF_None))); } return New; diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index b51f751..e633772 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -186,8 +186,8 @@ void CodeExtractor::splitReturnBlocks() { if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); if (DT) { - // Old dominates New. New node domiantes all other nodes dominated - //by Old. + // Old dominates New. New node dominates all other nodes dominated + // by Old. DomTreeNode *OldNode = DT->getNode(*I); SmallVector<DomTreeNode*, 8> Children; for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 8e82a02..8cc2649 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -129,7 +129,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { assert(II->getParent() != P->getIncomingBlock(i) && - "Invoke edge not supported yet"); II=II; + "Invoke edge not supported yet"); (void)II; } new StoreInst(P->getIncomingValue(i), Slot, P->getIncomingBlock(i)->getTerminator()); diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 88979e86..c1faf24 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -22,7 +22,9 @@ #include "llvm/Attributes.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CallSite.h" @@ -170,7 +172,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, - ValueMap<const Value*, Value*> &VMap, + ValueToValueMapTy &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; const Function *Caller = CS.getInstruction()->getParent()->getParent(); @@ -193,7 +195,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, for (; I != E; ++I) { const Value *OrigCall = I->first; - ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall); + ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! if (VMI == VMap.end() || VMI->second == 0) continue; @@ -228,6 +230,90 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, + IFI.TD) >= ByValAlignment) + return Arg; + + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. + } + + LLVMContext &Context = Arg->getContext(); + + const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Function *Caller = TheCall->getParent()->getParent(); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + &*Caller->begin()->begin()); + // Emit a memcpy. + const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys, 3); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (IFI.TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + CallInst *TheMemCpy = + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); + + // If we have a call graph, update it. + if (CallGraph *CG = IFI.CG) { + CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); + } + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -251,7 +337,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = @@ -287,7 +372,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. - ValueMap<const Value*, Value*> VMap; + ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -304,58 +389,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && - !CalledFunc->onlyReadsMemory()) { - const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); - const Type *VoidPtrTy = - Type::getInt8PtrTy(Context); - - // Create the alloca. If we have TargetData, use nice alignment. - unsigned Align = 1; - if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, - I->getName(), - &*Caller->begin()->begin()); - // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys, 3); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.TD == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.TD->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::get(Type::getInt1Ty(Context), 0) - }; - CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); - - // If we have a call graph, update it. - if (CallGraph *CG = IFI.CG) { - CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); - CallGraphNode *CallerNode = (*CG)[Caller]; - CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); - } - - // Uses of the argument in the function should use our new alloca - // instead. - ActualArg = NewAlloca; - + if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + // Calls that we inline may use the new alloca, so we need to clear - // their 'tail' flags. - MustClearTailCallFlags = true; + // their 'tail' flags if HandleByValArgument introduced a new alloca and + // the callee has calls. + MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; @@ -399,8 +440,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { if (!isa<Constant>(AI->getArraySize())) continue; - // Keep track of the static allocas that we inline into the caller if the - // StaticAllocas pointer is non-null. + // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them @@ -579,10 +619,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); + PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. - PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); @@ -600,14 +640,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } - - // Now that we inserted the PHI, check to see if it has a single value - // (e.g. all the entries are the same or undef). If so, remove the PHI so - // it doesn't block other optimizations. - if (Value *V = PHI->hasConstantValue()) { - PHI->replaceAllUsesWith(V); - PHI->eraseFromParent(); - } } @@ -664,5 +696,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) + if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 5ca8299..45c15de 100644 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -23,7 +23,9 @@ using namespace llvm; namespace { struct InstNamer : public FunctionPass { static char ID; // Pass identification, replacement for typeid - InstNamer() : FunctionPass(ID) {} + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &Info) const { Info.setPreservesAll(); @@ -48,11 +50,10 @@ namespace { }; char InstNamer::ID = 0; - INITIALIZE_PASS(InstNamer, "instnamer", - "Assign names to anonymous instructions", false, false); } - +INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false) char &llvm::InstructionNamerID = InstNamer::ID; //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 275b265..b2e5fa6 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -47,7 +47,9 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables"); namespace { struct LCSSA : public LoopPass { static char ID; // Pass identification, replacement for typeid - LCSSA() : LoopPass(ID) {} + LCSSA() : LoopPass(ID) { + initializeLCSSAPass(*PassRegistry::getPassRegistry()); + } // Cached analysis information for the current function. DominatorTree *DT; @@ -65,10 +67,7 @@ namespace { AU.setPreservesCFG(); AU.addRequired<DominatorTree>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); AU.addPreservedID(LoopSimplifyID); AU.addPreserved<ScalarEvolution>(); } @@ -90,7 +89,10 @@ namespace { } char LCSSA::ID = 0; -INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false); +INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSA(); } char &llvm::LCSSAID = LCSSA::ID; diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 52f0499..063c76e 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -22,9 +22,11 @@ #include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -66,9 +68,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { assert(BI->getParent() && "Terminator not inserted in block!"); OldDest->removePredecessor(BI->getParent()); - // Set the unconditional destination, and change the insn to be an - // unconditional branch. - BI->setUnconditionalDest(Destination); + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Destination, BI); + BI->eraseFromParent(); return true; } @@ -81,8 +83,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { assert(BI->getParent() && "Terminator not inserted in block!"); Dest1->removePredecessor(BI->getParent()); - // Change a conditional branch to unconditional. - BI->setUnconditionalDest(Dest1); + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest1, BI); + BI->eraseFromParent(); return true; } return false; @@ -209,9 +212,6 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { // We don't want debug info removed by anything this general. if (isa<DbgInfoIntrinsic>(I)) return false; - // Likewise for memory use markers. - if (isa<MemoryUseIntrinsic>(I)) return false; - if (!I->mayHaveSideEffects()) return true; // Special case intrinsics that "may have side effects" but can be deleted @@ -260,29 +260,45 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { return true; } +/// areAllUsesEqual - Check whether the uses of a value are all the same. +/// This is similar to Instruction::hasOneUse() except this will also return +/// true when there are multiple uses that all refer to the same value. +static bool areAllUsesEqual(Instruction *I) { + Value::use_iterator UI = I->use_begin(); + Value::use_iterator UE = I->use_end(); + if (UI == UE) + return false; + + User *TheUse = *UI; + for (++UI; UI != UE; ++UI) { + if (*UI != TheUse) + return false; + } + return true; +} + /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if the PHI node is actually deleted. -bool -llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { // We can remove a PHI if it is on a cycle in the def-use graph // where each node in the cycle has degree one, i.e. only one use, // and is an instruction with no side effects. - if (!PN->hasOneUse()) + if (!areAllUsesEqual(PN)) return false; bool Changed = false; SmallPtrSet<PHINode *, 4> PHIs; PHIs.insert(PN); for (Instruction *J = cast<Instruction>(*PN->use_begin()); - J->hasOneUse() && !J->mayHaveSideEffects(); + areAllUsesEqual(J) && !J->mayHaveSideEffects(); J = cast<Instruction>(*J->use_begin())) // If we find a PHI more than once, we're on a cycle that // won't prove fruitful. if (PHINode *JP = dyn_cast<PHINode>(J)) - if (!PHIs.insert(cast<PHINode>(JP))) { + if (!PHIs.insert(JP)) { // Break the cycle and delete the PHI and its operands. JP->replaceAllUsesWith(UndefValue::get(JP->getType())); (void)RecursivelyDeleteTriviallyDeadInstructions(JP); @@ -346,13 +362,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, WeakVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); - - Value *PNV = PN->hasConstantValue(); + + Value *PNV = SimplifyInstruction(PN, TD); if (PNV == 0) continue; - + // If we're able to simplify the phi to a single value, substitute the new // value into all of its uses. - assert(PNV != PN && "hasConstantValue broken"); + assert(PNV != PN && "SimplifyInstruction broken!"); Value *OldPhiIt = PhiIt; ReplaceAndSimplifyAllUses(PN, PNV, TD); @@ -402,6 +418,12 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { PredBB->replaceAllUsesWith(DestBB); if (P) { + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + if (DT) { + BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); + } ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); if (PI) { PI->replaceAllUses(PredBB, DestBB); @@ -645,3 +667,95 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return Changed; } + +/// enforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned enforceKnownAlignment(Value *V, unsigned Align, + unsigned PrefAlign) { + + User *U = dyn_cast<User>(V); + if (!U) return Align; + + switch (Operator::getOpcode(U)) { + default: break; + case Instruction::BitCast: + return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + case Instruction::GetElementPtr: { + // If all indexes are zero, it is just the alignment of the base pointer. + bool AllZeroOperands = true; + for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) + if (!isa<Constant>(*i) || + !cast<Constant>(*i)->isNullValue()) { + AllZeroOperands = false; + break; + } + + if (AllZeroOperands) { + // Treat this like a bitcast. + return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + } + return Align; + } + case Instruction::Alloca: { + AllocaInst *AI = cast<AllocaInst>(V); + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + return AI->getAlignment(); + AI->setAlignment(PrefAlign); + return PrefAlign; + } + } + + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (GV->isDeclaration()) return Align; + + if (GV->getAlignment() >= PrefAlign) + return GV->getAlignment(); + // We can only increase the alignment of the global if it has no alignment + // specified or if it is not assigned a section. If it is assigned a + // section, the global could be densely packed with other objects in the + // section, increasing the alignment could cause padding issues. + if (!GV->hasSection() || GV->getAlignment() == 0) + GV->setAlignment(PrefAlign); + return GV->getAlignment(); + } + + return Align; +} + +/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, + const TargetData *TD) { + assert(V->getType()->isPointerTy() && + "getOrEnforceKnownAlignment expects a pointer!"); + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + unsigned TrailZ = KnownZero.countTrailingOnes(); + + // Avoid trouble with rediculously large TrailZ values, such as + // those computed from a null pointer. + TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + // LLVM doesn't support alignments larger than this currently. + Align = std::min(Align, +Value::MaximumAlignment); + + if (PrefAlign > Align) + Align = enforceKnownAlignment(V, Align, PrefAlign); + + // We don't need to make any adjustment. + return Align; +} + diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index b3c4801..2462630 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,7 +37,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loopsimplify" +#define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" @@ -46,9 +46,10 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" @@ -65,7 +66,9 @@ STATISTIC(NumNested , "Number of nested loops split out"); namespace { struct LoopSimplify : public LoopPass { static char ID; // Pass identification, replacement for typeid - LoopSimplify() : LoopPass(ID) {} + LoopSimplify() : LoopPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } // AA - If we have an alias analysis object to update, this is it, otherwise // this is null. @@ -87,8 +90,6 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. - AU.addPreserved<DominanceFrontier>(); - AU.addPreservedID(LCSSAID); } /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. @@ -107,8 +108,12 @@ namespace { } char LoopSimplify::ID = 0; -INITIALIZE_PASS(LoopSimplify, "loopsimplify", - "Canonicalize natural loops", true, false); +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) // Publically exposed interface to pass... char &llvm::LoopSimplifyID = LoopSimplify::ID; @@ -157,9 +162,8 @@ ReprocessLoop: for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), E = BadPreds.end(); I != E; ++I) { - DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "; - WriteAsOperand(dbgs(), *I, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << (*I)->getName() << "\n"); // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) @@ -184,9 +188,8 @@ ReprocessLoop: if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { - DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "; - WriteAsOperand(dbgs(), *I, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << (*I)->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); @@ -262,8 +265,9 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = PN->hasConstantValue(DT)) { + if (Value *V = SimplifyInstruction(PN, 0, DT)) { if (AA) AA->deleteValue(PN); + if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); } @@ -317,29 +321,22 @@ ReprocessLoop: if (!FoldBranchToCommonDest(BI)) continue; // Success. The block is now dead, so remove it from the loop, - // update the dominator tree and dominance frontier, and delete it. - - DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "; - WriteAsOperand(dbgs(), ExitingBlock, false); - dbgs() << "\n"); + // update the dominator tree and delete it. + DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); while (!Children.empty()) { DomTreeNode *Child = Children.front(); DT->changeImmediateDominator(Child, Node->getIDom()); - if (DF) DF->changeImmediateDominator(Child->getBlock(), - Node->getIDom()->getBlock(), - DT); } DT->eraseNode(ExitingBlock); - if (DF) DF->removeBlock(ExitingBlock); BI->getSuccessor(0)->removePredecessor(ExitingBlock); BI->getSuccessor(1)->removePredecessor(ExitingBlock); @@ -378,9 +375,8 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), ".preheader", this); - DEBUG(dbgs() << "LoopSimplify: Creating pre-header "; - WriteAsOperand(dbgs(), NewBB, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() + << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -409,10 +405,8 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { LoopBlocks.size(), ".loopexit", this); - DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "; - WriteAsOperand(dbgs(), NewBB, false); - dbgs() << "\n"); - + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewBB->getName() << "\n"); return NewBB; } @@ -438,11 +432,11 @@ static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, /// FindPHIToPartitionLoops - The first part of loop-nestification is to find a /// PHI node that tells us how to partition the loops. static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, - AliasAnalysis *AA) { + AliasAnalysis *AA, LoopInfo *LI) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = PN->hasConstantValue(DT)) { + if (Value *V = SimplifyInstruction(PN, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -516,7 +510,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// created. /// Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { - PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); if (PN == 0) return 0; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -643,9 +637,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); - DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "; - WriteAsOperand(dbgs(), BEBlock, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; @@ -721,8 +714,6 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // Update dominator information DT->splitBlock(BEBlock); - if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) - DF->splitBlock(BEBlock); return BEBlock; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 236bbe9..7da7271 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,13 +16,14 @@ // // The process of unrolling can produce extraneous basic blocks linked with // unconditional branches. This will be corrected in the future. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" @@ -30,20 +31,19 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" - using namespace llvm; // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); -STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); +STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. static inline void RemapInstruction(Instruction *I, - ValueMap<const Value *, Value*> &VMap) { + ValueToValueMapTy &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); + ValueToValueMapTy::iterator It = VMap.find(Op); if (It != VMap.end()) I->setOperand(op, It->second); } @@ -96,7 +96,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { } /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true -/// if unrolling was succesful, or false if the loop was unmodified. Unrolling +/// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. @@ -105,7 +105,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. -bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { +bool llvm::UnrollLoop(Loop *L, unsigned Count, + LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -127,6 +128,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } + + if (Header->hasAddressTaken()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Won't unroll loop: address of header block is taken.\n"); + return false; + } // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. @@ -189,7 +197,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. - typedef ValueMap<const Value*, Value*> ValueToValueMapTy; ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { @@ -274,7 +281,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, LastValueMap); + ::RemapInstruction(I, LastValueMap); } // The latch block exits the loop. If there are any PHI nodes in the @@ -342,7 +349,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { - Term->setUnconditionalDest(Dest); + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); // Merge adjacent basic blocks, if possible. if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { std::replace(Latches.begin(), Latches.end(), Dest, Fold); @@ -362,10 +371,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Constant *C = ConstantFoldInstruction(Inst)) { - Inst->replaceAllUsesWith(C); - (*BB)->getInstList().erase(Inst); - } + else if (Value *V = SimplifyInstruction(Inst)) + if (LI->replacementPreservesLCSSAForm(Inst, V)) { + Inst->replaceAllUsesWith(V); + (*BB)->getInstList().erase(Inst); + } } NumCompletelyUnrolled += CompletelyUnroll; diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index a46dd84..025ae0d 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -79,7 +79,9 @@ namespace { explicit LowerInvoke(const TargetLowering *tli = NULL, bool useExpensiveEHSupport = ExpensiveEHSupport) : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), - TLI(tli) { } + TLI(tli) { + initializeLowerInvokePass(*PassRegistry::getPassRegistry()); + } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -102,7 +104,7 @@ namespace { char LowerInvoke::ID = 0; INITIALIZE_PASS(LowerInvoke, "lowerinvoke", "Lower invoke and unwind, for unwindless code generators", - false, false); + false, false) char &llvm::LowerInvokePassID = LowerInvoke::ID; @@ -148,19 +150,20 @@ bool LowerInvoke::doInitialization(Module &M) { "llvm.sjljeh.jblist"); } -// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>, -// so it looks like Intrinsic::_setjmp -#if defined(_MSC_VER) && defined(setjmp) -#define setjmp_undefined_for_visual_studio -#undef setjmp +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc #endif SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp); -#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio) -// let's return it to _setjmp state in case anyone ever needs it after this -// point under VisualStudio -#define setjmp _setjmp +#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) + // let's return it to _setjmp state +# pragma pop_macro("setjmp") +# undef setjmp_undefined_for_msvc #endif LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp); @@ -186,6 +189,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); II->replaceAllUsesWith(NewCall); // Insert an unconditional branch to the normal destination. @@ -266,6 +270,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); II->replaceAllUsesWith(NewCall); // Replace the invoke with an uncond branch. diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 5530b47..914a439 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -33,7 +33,9 @@ namespace { class LowerSwitch : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - LowerSwitch() : FunctionPass(ID) {} + LowerSwitch() : FunctionPass(ID) { + initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -80,7 +82,7 @@ namespace { char LowerSwitch::ID = 0; INITIALIZE_PASS(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false); + "Lower SwitchInst's to branches", false, false) // Publically exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; @@ -107,7 +109,8 @@ bool LowerSwitch::runOnFunction(Function &F) { // operator<< - Used for debugging purposes. // static raw_ostream& operator<<(raw_ostream &O, - const LowerSwitch::CaseVector &C) ATTRIBUTE_USED; + const LowerSwitch::CaseVector &C) + LLVM_ATTRIBUTE_USED; static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) { O << "["; diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 101645b..f4ca81a 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -27,18 +27,17 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { struct PromotePass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - PromotePass() : FunctionPass(ID) {} + PromotePass() : FunctionPass(ID) { + initializePromotePassPass(*PassRegistry::getPassRegistry()); + } // runOnFunction - To run this pass, first we calculate the alloca // instructions that are safe for promotion, then we promote each one. // virtual bool runOnFunction(Function &F); - // getAnalysisUsage - We need dominance frontiers - // virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); AU.setPreservesCFG(); // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); @@ -49,8 +48,11 @@ namespace { } // end of anonymous namespace char PromotePass::ID = 0; -INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register", - false, false); +INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", + false, false) bool PromotePass::runOnFunction(Function &F) { std::vector<AllocaInst*> Allocas; @@ -60,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) { bool Changed = false; DominatorTree &DT = getAnalysis<DominatorTree>(); - DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); while (1) { Allocas.clear(); @@ -74,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + PromoteMemToReg(Allocas, DT); NumPromoted += Allocas.size(); Changed = true; } diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a4e3029..e6a4373 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -9,10 +9,19 @@ // // This file promotes memory references to be register references. It promotes // alloca instructions which only have loads and stores as uses. An alloca is -// transformed by using dominator frontiers to place PHI nodes, then traversing -// the function in depth-first order to rewrite loads and stores as appropriate. -// This is just the standard SSA construction algorithm to construct "pruned" -// SSA form. +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +// The algorithm used here is based on: +// +// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +// Programming Languages +// POPL '95. ACM, New York, NY, 62-73. +// +// It has been modified to not explicitly use the DJ graph data structure and to +// directly compute pruned SSA using per-variable liveness information. // //===----------------------------------------------------------------------===// @@ -24,9 +33,10 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -34,6 +44,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include <algorithm> +#include <map> +#include <queue> using namespace llvm; STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); @@ -178,7 +190,6 @@ namespace { /// std::vector<AllocaInst*> Allocas; DominatorTree &DT; - DominanceFrontier &DF; DIFactory *DIF; /// AST - An AliasSetTracker object to update. If null, don't update it. @@ -187,7 +198,7 @@ namespace { /// AllocaLookup - Reverse mapping of Allocas. /// - std::map<AllocaInst*, unsigned> AllocaLookup; + DenseMap<AllocaInst*, unsigned> AllocaLookup; /// NewPhiNodes - The PhiNodes we're adding. /// @@ -216,12 +227,15 @@ namespace { /// non-determinstic behavior. DenseMap<BasicBlock*, unsigned> BBNumbers; + /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. + DenseMap<DomTreeNode*, unsigned> DomLevels; + /// BBNumPreds - Lazily compute the number of predecessors a block has. DenseMap<const BasicBlock*, unsigned> BBNumPreds; public: PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} + AliasSetTracker *ast) + : Allocas(A), DT(dt), DIF(0), AST(ast) {} ~PromoteMem2Reg() { delete DIF; } @@ -264,13 +278,12 @@ namespace { void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, std::vector<RenamePassData> &Worklist); - bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, - SmallPtrSet<PHINode*, 16> &InsertedPHINodes); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; struct AllocaInfo { - std::vector<BasicBlock*> DefiningBlocks; - std::vector<BasicBlock*> UsingBlocks; + SmallVector<BasicBlock*, 32> DefiningBlocks; + SmallVector<BasicBlock*, 32> UsingBlocks; StoreInst *OnlyStore; BasicBlock *OnlyBlock; @@ -325,11 +338,19 @@ namespace { DbgDeclare = FindAllocaDbgDeclare(AI); } }; + + typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; + + struct DomTreeNodeCompare { + bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { + return LHS.second < RHS.second; + } + }; } // end of anonymous namespace void PromoteMem2Reg::run() { - Function &F = *DF.getRoot()->getParent(); + Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); @@ -422,7 +443,26 @@ void PromoteMem2Reg::run() { continue; } } - + + // If we haven't computed dominator tree levels, do so now. + if (DomLevels.empty()) { + SmallVector<DomTreeNode*, 32> Worklist; + + DomTreeNode *Root = DT.getRootNode(); + DomLevels[Root] = 0; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + unsigned ChildLevel = DomLevels[Node] + 1; + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); + CI != CE; ++CI) { + DomLevels[*CI] = ChildLevel; + Worklist.push_back(*CI); + } + } + } + // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { @@ -484,9 +524,8 @@ void PromoteMem2Reg::run() { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in - // sections of dead code that were not processed on the dominance frontier. - // Just delete the users now. - // + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); @@ -509,9 +548,9 @@ void PromoteMem2Reg::run() { for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; - + // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = PN->hasConstantValue(&DT)) { + if (Value *V = SimplifyInstruction(PN, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -663,7 +702,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, /// avoiding insertion of dead phi nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { - // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock*, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); @@ -673,47 +711,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, SmallPtrSet<BasicBlock*, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); - // Compute the locations where PhiNodes need to be inserted. Look at the - // dominance frontier of EACH basic-block we have a write in. - unsigned CurrentVersion = 0; - SmallPtrSet<PHINode*, 16> InsertedPHINodes; - std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; - while (!Info.DefiningBlocks.empty()) { - BasicBlock *BB = Info.DefiningBlocks.back(); - Info.DefiningBlocks.pop_back(); - - // Look up the DF for this write, add it to defining blocks. - DominanceFrontier::const_iterator it = DF.find(BB); - if (it == DF.end()) continue; - - const DominanceFrontier::DomSetType &S = it->second; - - // In theory we don't need the indirection through the DFBlocks vector. - // In practice, the order of calling QueuePhiNode would depend on the - // (unspecified) ordering of basic blocks in the dominance frontier, - // which would give PHI nodes non-determinstic subscripts. Fix this by - // processing blocks in order of the occurance in the function. - for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), - PE = S.end(); P != PE; ++P) { - // If the frontier block is not in the live-in set for the alloca, don't - // bother processing it. - if (!LiveInBlocks.count(*P)) - continue; - - DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); - } - - // Sort by which the block ordering in the function. - if (DFBlocks.size() > 1) - std::sort(DFBlocks.begin(), DFBlocks.end()); - - for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { - BasicBlock *BB = DFBlocks[i].second; - if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) - Info.DefiningBlocks.push_back(BB); + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, + DomTreeNodeCompare> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); I != E; ++I) { + if (DomTreeNode *Node = DT.getNode(*I)) + PQ.push(std::make_pair(Node, DomLevels[Node])); + } + + SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; + SmallPtrSet<DomTreeNode*, 32> Visited; + SmallVector<DomTreeNode*, 32> Worklist; + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + DomTreeNode *SuccNode = DT.getNode(*SI); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels[SuccNode]; + if (SuccLevel > RootLevel) + continue; + + if (!Visited.insert(SuccNode)) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + if (!LiveInBlocks.count(SuccBB)) + continue; + + DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); + if (!DefBlocks.count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; + ++CI) { + if (!Visited.count(*CI)) + Worklist.push_back(*CI); + } } - DFBlocks.clear(); } + + if (DFBlocks.size() > 1) + std::sort(DFBlocks.begin(), DFBlocks.end()); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) + QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); } /// RewriteSingleStoreAlloca - If there is only a single store to this value, @@ -900,8 +969,7 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // Alloca returns true if there wasn't already a phi-node for that variable // bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, - unsigned &Version, - SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { + unsigned &Version) { // Look up the basic-block in question. PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; @@ -916,8 +984,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; PN->reserveOperandSpace(getNumPreds(BB)); - - InsertedPHINodes.insert(PN); if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); @@ -986,7 +1052,7 @@ NextIteration: AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); if (!Src) continue; - std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); + DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); if (AI == AllocaLookup.end()) continue; Value *V = IncomingVals[AI->second]; @@ -1002,7 +1068,7 @@ NextIteration: AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); if (!Dest) continue; - std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; @@ -1036,18 +1102,17 @@ NextIteration: } /// PromoteMemToReg - Promote the specified list of alloca instructions into -/// scalar registers, inserting PHI nodes as appropriate. This function makes -/// use of DominanceFrontier information. This function does not modify the CFG -/// of the function at all. All allocas must be from the same function. +/// scalar registers, inserting PHI nodes as appropriate. This function does +/// not modify the CFG of the function at all. All allocas must be from the +/// same function. /// /// If AST is specified, the specified tracker is updated to reflect changes /// made to the IR. /// void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, - DominatorTree &DT, DominanceFrontier &DF, - AliasSetTracker *AST) { + DominatorTree &DT, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DF, AST).run(); + PromoteMem2Reg(Allocas, DT, AST).run(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index c855988..3896d98 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "ssaupdater" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" @@ -178,9 +179,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (Value *ConstVal = InsertedPHI->hasConstantValue()) { + if (Value *V = SimplifyInstruction(InsertedPHI)) { InsertedPHI->eraseFromParent(); - return ConstVal; + return V; } // If the client wants to know about all new instructions, tell it. @@ -342,3 +343,169 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); return Impl.GetValue(BB); } + +//===----------------------------------------------------------------------===// +// LoadAndStorePromoter Implementation +//===----------------------------------------------------------------------===// + +LoadAndStorePromoter:: +LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, + SSAUpdater &S, StringRef BaseName) : SSA(S) { + if (Insts.empty()) return; + + Value *SomeVal; + if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) + SomeVal = LI; + else + SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); + + if (BaseName.empty()) + BaseName = SomeVal->getName(); + SSA.Initialize(SomeVal->getType(), BaseName); +} + + +void LoadAndStorePromoter:: +run(const SmallVectorImpl<Instruction*> &Insts) const { + + // First step: bucket up uses of the alloca by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element. + DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + UsesByBlock[User->getParent()].push_back(User); + } + + // Okay, now we can iterate over all the blocks in the function with uses, + // processing them. Keep track of which loads are loading a live-in value. + // Walk the uses in the use-list order to be determinstic. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + BasicBlock *BB = User->getParent(); + std::vector<Instruction*> &BlockUses = UsesByBlock[BB]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (StoreInst *SI = dyn_cast<StoreInst>(User)) + SSA.AddAvailableValue(BB, SI->getOperand(0)); + else + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + BlockUses.clear(); + continue; + } + + // Otherwise, check to see if this block is all loads. + bool HasStore = false; + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { + if (isa<StoreInst>(BlockUses[i])) { + HasStore = true; + break; + } + } + + // If so, we can queue them all as live in loads. We don't have an + // efficient way to tell which on is first in the block and don't want to + // scan large blocks, so just add all loads as live ins. + if (!HasStore) { + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) + LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + BlockUses.clear(); + continue; + } + + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + Value *StoredValue = 0; + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast<LoadInst>(II)) { + // If this is a load from an unrelated pointer, ignore it. + if (!isInstInList(L, Insts)) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + replaceLoadWithValue(L, StoredValue); + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *S = dyn_cast<StoreInst>(II)) { + // If this is a store to an unrelated pointer, ignore it. + if (!isInstInList(S, Insts)) continue; + + // Remember that this is the active value in the block. + StoredValue = S->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { + LoadInst *ALoad = LiveInLoads[i]; + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + replaceLoadWithValue(ALoad, NewVal); + + // Avoid assertions in unreachable code. + if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); + ALoad->replaceAllUsesWith(NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // Allow the client to do stuff before we start nuking things. + doExtraRewritesBeforeFinalDeletion(); + + // Now that everything is rewritten, delete the old instructions from the + // function. They should all be dead now. + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + replaceLoadWithValue(cast<LoadInst>(User), NewVal); + User->replaceAllUsesWith(NewVal); + } + + instructionDeleted(User); + User->eraseFromParent(); + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 28d7afb..fb660db 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,33 +19,34 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <functional> #include <set> #include <map> using namespace llvm; +static cl::opt<bool> +DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); + STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { class SimplifyCFGOpt { const TargetData *const TD; - ConstantInt *GetConstantInt(Value *V); - Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values); - Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values); - bool GatherValueComparisons(Instruction *Cond, Value *&CompVal, - std::vector<ConstantInt*> &Values); Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases); @@ -53,6 +54,14 @@ class SimplifyCFGOpt { BasicBlock *Pred); bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI); + bool SimplifyReturn(ReturnInst *RI); + bool SimplifyUnwind(UnwindInst *UI); + bool SimplifyUnreachable(UnreachableInst *UI); + bool SimplifySwitch(SwitchInst *SI); + bool SimplifyIndirectBr(IndirectBrInst *IBI); + bool SimplifyUncondBranch(BranchInst *BI); + bool SimplifyCondBranch(BranchInst *BI); + public: explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} bool run(BasicBlock *BB); @@ -91,8 +100,6 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { /// ExistPred, an existing predecessor of Succ. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { - assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) != - succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!"); if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do PHINode *PN; @@ -102,28 +109,29 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, } -/// GetIfCondition - Given a basic block (BB) with two predecessors (and -/// presumably PHI nodes in it), check to see if the merge at this block is due +/// GetIfCondition - Given a basic block (BB) with two predecessors (and at +/// least one PHI node in it), check to see if the merge at this block is due /// to an "if condition". If so, return the boolean condition that determines /// which entry into BB will be taken. Also, return by references the block /// that will be entered from if the condition is true, and the block that will /// be entered if the condition is false. /// -/// -static Value *GetIfCondition(BasicBlock *BB, - BasicBlock *&IfTrue, BasicBlock *&IfFalse) { - assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 && +/// This does no checking to see if the true/false blocks have large or unsavory +/// instructions in them. +static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = cast<PHINode>(BB->begin()); + assert(SomePHI->getNumIncomingValues() == 2 && "Function can only handle blocks with 2 predecessors!"); - BasicBlock *Pred1 = *pred_begin(BB); - BasicBlock *Pred2 = *++pred_begin(BB); + BasicBlock *Pred1 = SomePHI->getIncomingBlock(0); + BasicBlock *Pred2 = SomePHI->getIncomingBlock(1); // We can only handle branches. Other control flow will be lowered to // branches if possible anyway. - if (!isa<BranchInst>(Pred1->getTerminator()) || - !isa<BranchInst>(Pred2->getTerminator())) + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (Pred1Br == 0 || Pred2Br == 0) return 0; - BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator()); - BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator()); // Eliminate code duplication by ensuring that Pred1Br is conditional if // either are. @@ -140,6 +148,12 @@ static Value *GetIfCondition(BasicBlock *BB, } if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (Pred2->getSinglePredecessor() == 0) + return 0; + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -156,39 +170,29 @@ static Value *GetIfCondition(BasicBlock *BB, return 0; } - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (++pred_begin(Pred2) != pred_end(Pred2)) - return 0; - return Pred1Br->getCondition(); } // Ok, if we got here, both predecessors end with an unconditional branch to // BB. Don't panic! If both blocks only have a single (identical) // predecessor, and THAT is a conditional branch, then we're all ok! - if (pred_begin(Pred1) == pred_end(Pred1) || - ++pred_begin(Pred1) != pred_end(Pred1) || - pred_begin(Pred2) == pred_end(Pred2) || - ++pred_begin(Pred2) != pred_end(Pred2) || - *pred_begin(Pred1) != *pred_begin(Pred2)) + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) return 0; // Otherwise, if this is a conditional branch, then we can use it! - BasicBlock *CommonPred = *pred_begin(Pred1); - if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) { - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (BI == 0) return 0; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; } - return 0; + return BI->getCondition(); } /// DominatesMergePoint - If we have a merge point of an "if condition" as @@ -201,7 +205,7 @@ static Value *GetIfCondition(BasicBlock *BB, /// non-trapping. If both are true, the instruction is inserted into the set /// and true is returned. static bool DominatesMergePoint(Value *V, BasicBlock *BB, - std::set<Instruction*> *AggressiveInsts) { + SmallPtrSet<Instruction*, 4> *AggressiveInsts) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -219,56 +223,55 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If this instruction is defined in a block that contains an unconditional // branch to BB, then it must be in the 'conditional' part of the "if - // statement". - if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator())) - if (BI->isUnconditional() && BI->getSuccessor(0) == BB) { - if (!AggressiveInsts) return false; - // Okay, it looks like the instruction IS in the "condition". Check to - // see if it's a cheap instruction to unconditionally compute, and if it - // only uses stuff defined outside of the condition. If so, hoist it out. - if (!I->isSafeToSpeculativelyExecute()) - return false; + // statement". If not, it definitely dominates the region. + BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); + if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB) + return true; - switch (I->getOpcode()) { - default: return false; // Cannot hoist this out safely. - case Instruction::Load: { - // We have to check to make sure there are no instructions before the - // load in its basic block, as we are going to hoist the loop out to - // its predecessor. - BasicBlock::iterator IP = PBB->begin(); - while (isa<DbgInfoIntrinsic>(IP)) - IP++; - if (IP != BasicBlock::iterator(I)) - return false; - break; - } - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - break; // These are all cheap and non-trapping instructions. - } + // If we aren't allowing aggressive promotion anymore, then don't consider + // instructions in the 'if region'. + if (AggressiveInsts == 0) return false; + + // Okay, it looks like the instruction IS in the "condition". Check to + // see if it's a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + if (!I->isSafeToSpeculativelyExecute()) + return false; - // Okay, we can only really hoist these out if their operands are not - // defined in the conditional region. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, 0)) - return false; - // Okay, it's safe to do this! Remember this instruction. - AggressiveInsts->insert(I); - } + switch (I->getOpcode()) { + default: return false; // Cannot hoist this out safely. + case Instruction::Load: + // We have to check to make sure there are no instructions before the + // load in its basic block, as we are going to hoist the load out to its + // predecessor. + if (PBB->getFirstNonPHIOrDbg() != I) + return false; + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + break; // These are all cheap and non-trapping instructions. + } + // Okay, we can only really hoist these out if their operands are not + // defined in the conditional region. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (!DominatesMergePoint(*i, BB, 0)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts->insert(I); return true; } /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) { +static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) @@ -296,77 +299,94 @@ ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) { return 0; } -/// GatherConstantSetEQs - Given a potentially 'or'd together collection of -/// icmp_eq instructions that compare a value against a constant, return the -/// value being compared, and stick the constant into the Values vector. -Value *SimplifyCFGOpt:: -GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) { - if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Inst->getOpcode() == Instruction::ICmp && - cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) { - if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) { - Values.push_back(C); - return Inst->getOperand(0); - } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) { - Values.push_back(C); - return Inst->getOperand(1); +/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together +/// collection of icmp eq/ne instructions that compare a value against a +/// constant, return the value being compared, and stick the constant into the +/// Values vector. +static Value * +GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, + const TargetData *TD, bool isEQ, unsigned &UsedICmps) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return 0; + + // If this is an icmp against a constant, handle this as one of the cases. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { + if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + UsedICmps++; + Vals.push_back(C); + return I->getOperand(0); } - } else if (Inst->getOpcode() == Instruction::Or) { - if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values)) - if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values)) - if (LHS == RHS) - return LHS; + + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to + // the set. + ConstantRange Span = + ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); + + // If this is an and/!= check then we want to optimize "x ugt 2" into + // x != 0 && x != 1. + if (!isEQ) + Span = Span.inverse(); + + // If there are a ton of values, we don't want to make a ginormous switch. + if (Span.getSetSize().ugt(8) || Span.isEmptySet() || + // We don't handle wrapped sets yet. + Span.isWrappedSet()) + return 0; + + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) + Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); + UsedICmps++; + return I->getOperand(0); } + return 0; } - return 0; -} + + // Otherwise, we can only handle an | or &, depending on isEQ. + if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) + return 0; + + unsigned NumValsBeforeLHS = Vals.size(); + unsigned UsedICmpsBeforeLHS = UsedICmps; + if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, + isEQ, UsedICmps)) { + unsigned NumVals = Vals.size(); + unsigned UsedICmpsBeforeRHS = UsedICmps; + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + isEQ, UsedICmps)) { + if (LHS == RHS) + return LHS; + Vals.resize(NumVals); + UsedICmps = UsedICmpsBeforeRHS; + } -/// GatherConstantSetNEs - Given a potentially 'and'd together collection of -/// setne instructions that compare a value against a constant, return the value -/// being compared, and stick the constant into the Values vector. -Value *SimplifyCFGOpt:: -GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) { - if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Inst->getOpcode() == Instruction::ICmp && - cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) { - if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) { - Values.push_back(C); - return Inst->getOperand(0); - } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) { - Values.push_back(C); - return Inst->getOperand(1); - } - } else if (Inst->getOpcode() == Instruction::And) { - if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values)) - if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values)) - if (LHS == RHS) - return LHS; + // The RHS of the or/and can't be folded in and we haven't used "Extra" yet, + // set it and return success. + if (Extra == 0 || Extra == I->getOperand(1)) { + Extra = I->getOperand(1); + return LHS; } + + Vals.resize(NumValsBeforeLHS); + UsedICmps = UsedICmpsBeforeLHS; + return 0; } - return 0; -} - -/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a -/// bunch of comparisons of one value against constants, return the value and -/// the constants being compared. -bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal, - std::vector<ConstantInt*> &Values) { - if (Cond->getOpcode() == Instruction::Or) { - CompVal = GatherConstantSetEQs(Cond, Values); - - // Return true to indicate that the condition is true if the CompVal is - // equal to one of the constants. - return true; - } else if (Cond->getOpcode() == Instruction::And) { - CompVal = GatherConstantSetNEs(Cond, Values); - - // Return false to indicate that the condition is false if the CompVal is - // equal to one of the constants. - return false; + + // If the LHS can't be folded in, but Extra is available and RHS can, try to + // use LHS as Extra. + if (Extra == 0 || Extra == I->getOperand(0)) { + Value *OldExtra = Extra; + Extra = I->getOperand(0); + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + isEQ, UsedICmps)) + return RHS; + assert(Vals.size() == NumValsBeforeLHS); + Extra = OldExtra; } - return false; + + return 0; } - + static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { Instruction* Cond = 0; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { @@ -374,6 +394,8 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) Cond = dyn_cast<Instruction>(BI->getCondition()); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) { + Cond = dyn_cast<Instruction>(IBI->getAddress()); } TI->eraseFromParent(); @@ -395,7 +417,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || ICI->getPredicate() == ICmpInst::ICMP_NE) && - GetConstantInt(ICI->getOperand(1))) + GetConstantInt(ICI->getOperand(1), TD)) CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. @@ -420,7 +442,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, BranchInst *BI = cast<BranchInst>(TI); ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); - Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)), + Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD), BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE))); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); @@ -459,8 +481,8 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1, } // Otherwise, just sort both lists and compare element by element. - std::sort(V1->begin(), V1->end()); - std::sort(V2->begin(), V2->end()); + array_pod_sort(V1->begin(), V1->end()); + array_pod_sort(V2->begin(), V2->end()); unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); while (i1 != e1 && i2 != e2) { if ((*V1)[i1].first == (*V2)[i2].first) @@ -506,90 +528,87 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // If we are here, we know that the value is none of those cases listed in // PredCases. If there are any cases in ThisCases that are in PredCases, we // can simplify TI. - if (ValuesOverlap(PredCases, ThisCases)) { - if (isa<BranchInst>(TI)) { - // Okay, one of the successors of this condbr is dead. Convert it to a - // uncond br. - assert(ThisCases.size() == 1 && "Branch can only have one case!"); - // Insert the new branch. - Instruction *NI = BranchInst::Create(ThisDef, TI); - (void) NI; - - // Remove PHI node entries for the dead edge. - ThisCases[0].second->removePredecessor(TI->getParent()); - - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); - - EraseTerminatorInstAndDCECond(TI); - return true; - - } else { - SwitchInst *SI = cast<SwitchInst>(TI); - // Okay, TI has cases that are statically dead, prune them away. - SmallPtrSet<Constant*, 16> DeadCases; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - DeadCases.insert(PredCases[i].first); + if (!ValuesOverlap(PredCases, ThisCases)) + return false; + + if (isa<BranchInst>(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + // Insert the new branch. + Instruction *NI = BranchInst::Create(ThisDef, TI); + (void) NI; - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI); + // Remove PHI node entries for the dead edge. + ThisCases[0].second->removePredecessor(TI->getParent()); - for (unsigned i = SI->getNumCases()-1; i != 0; --i) - if (DeadCases.count(SI->getCaseValue(i))) { - SI->getSuccessor(i)->removePredecessor(TI->getParent()); - SI->removeCase(i); - } + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); - DEBUG(dbgs() << "Leaving: " << *TI << "\n"); - return true; - } + EraseTerminatorInstAndDCECond(TI); + return true; } - - } else { - // Otherwise, TI's block must correspond to some matched value. Find out - // which value (or set of values) this is. - ConstantInt *TIV = 0; - BasicBlock *TIBB = TI->getParent(); + + SwitchInst *SI = cast<SwitchInst>(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet<Constant*, 16> DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second == TIBB) { - if (TIV == 0) - TIV = PredCases[i].first; - else - return false; // Cannot handle multiple values coming to this block. - } - assert(TIV && "No edge from pred to succ?"); - - // Okay, we found the one constant that our value can be if we get into TI's - // BB. Find out which successor will unconditionally be branched to. - BasicBlock *TheRealDest = 0; - for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) - if (ThisCases[i].first == TIV) { - TheRealDest = ThisCases[i].second; - break; + DeadCases.insert(PredCases[i].first); + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); + + for (unsigned i = SI->getNumCases()-1; i != 0; --i) + if (DeadCases.count(SI->getCaseValue(i))) { + SI->getSuccessor(i)->removePredecessor(TI->getParent()); + SI->removeCase(i); } - // If not handled by any explicit cases, it is handled by the default case. - if (TheRealDest == 0) TheRealDest = ThisDef; + DEBUG(dbgs() << "Leaving: " << *TI << "\n"); + return true; + } + + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = 0; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second == TIBB) { + if (TIV != 0) + return false; // Cannot handle multiple values coming to this block. + TIV = PredCases[i].first; + } + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = 0; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].first == TIV) { + TheRealDest = ThisCases[i].second; + break; + } - // Remove PHI node entries for dead edges. - BasicBlock *CheckEdge = TheRealDest; - for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) - if (*SI != CheckEdge) - (*SI)->removePredecessor(TIBB); - else - CheckEdge = 0; + // If not handled by any explicit cases, it is handled by the default case. + if (TheRealDest == 0) TheRealDest = ThisDef; - // Insert the new branch. - Instruction *NI = BranchInst::Create(TheRealDest, TI); - (void) NI; + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) + if (*SI != CheckEdge) + (*SI)->removePredecessor(TIBB); + else + CheckEdge = 0; - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + // Insert the new branch. + Instruction *NI = BranchInst::Create(TheRealDest, TI); + (void) NI; - EraseTerminatorInstAndDCECond(TI); - return true; - } - return false; + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + + EraseTerminatorInstAndDCECond(TI); + return true; } namespace { @@ -603,6 +622,16 @@ namespace { }; } +static int ConstantIntSortPredicate(const void *P1, const void *P2) { + const ConstantInt *LHS = *(const ConstantInt**)P1; + const ConstantInt *RHS = *(const ConstantInt**)P2; + if (LHS->getValue().ult(RHS->getValue())) + return 1; + if (LHS->getValue() == RHS->getValue()) + return 0; + return -1; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -798,7 +827,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); - BB2->getInstList().erase(I2); + I2->eraseFromParent(); I1 = BB1_Itr++; while (isa<DbgInfoIntrinsic>(I1)) @@ -836,18 +865,18 @@ HoistTerminator: (PN = dyn_cast<PHINode>(BBI)); ++BBI) { Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); - if (BB1V != BB2V) { - // These values do not agree. Insert a select instruction before NT - // that determines the right value. - SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) - SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V, - BB1V->getName()+"."+BB2V->getName(), NT); - // Make the PHI node use the select for all incoming values for BB1/BB2 - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) - PN->setIncomingValue(i, SI); - } + if (BB1V == BB2V) continue; + + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (SI == 0) + SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V, + BB1V->getName()+"."+BB2V->getName(), NT); + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) + PN->setIncomingValue(i, SI); } } @@ -872,21 +901,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { BBI != BBE; ++BBI) { Instruction *I = BBI; // Skip debug info. - if (isa<DbgInfoIntrinsic>(I)) continue; - if (I == Term) break; + if (isa<DbgInfoIntrinsic>(I)) continue; + if (I == Term) break; - if (!HInst) - HInst = I; - else + if (HInst) return false; + HInst = I; } if (!HInst) return false; // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); - if (isa<Instruction>(BrCond) && - cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp) + if (isa<FCmpInst>(BrCond)) return false; // If BB1 is actually on the false edge of the conditional branch, remember @@ -990,12 +1017,12 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end(); UI != UE; ++UI) { Instruction *Use = cast<Instruction>(*UI); - if (BB1Insns.count(Use)) { - // If BrCond uses the instruction that place it just before - // branch instruction. - InsertPos = BI; - break; - } + if (!BB1Insns.count(Use)) continue; + + // If BrCond uses the instruction that place it just before + // branch instruction. + InsertPos = BI; + break; } } else InsertPos = BI; @@ -1016,8 +1043,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) { PHINode *PN = PHIUses[i]; for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j) - if (PN->getIncomingBlock(j) == BB1 || - PN->getIncomingBlock(j) == BIParent) + if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent) PN->setIncomingValue(j, SI); } @@ -1055,7 +1081,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1075,78 +1101,73 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantInt *CB; - if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && - CB->getType()->isIntegerTy(1)) { - // Okay, we now know that all edges from PredBB should be revectored to - // branch to RealDest. - BasicBlock *PredBB = PN->getIncomingBlock(i); - BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); + if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; + + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + BasicBlock *PredBB = PN->getIncomingBlock(i); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) continue; // Skip self loops. + + // The dest block might have PHI nodes, other predecessors and other + // difficult cases. Instead of being smart about this, just insert a new + // block that jumps to the destination block, effectively splitting + // the edge we are about to create. + BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), + RealDest->getName()+".critedge", + RealDest->getParent(), RealDest); + BranchInst::Create(RealDest, EdgeBB); + + // Update PHI nodes. + AddPredecessorToBlock(RealDest, EdgeBB, BB); + + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->begin(); + DenseMap<Value*, Value*> TranslateMap; // Track translated values. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + continue; + } + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) N->setName(BBI->getName()+".c"); - if (RealDest == BB) continue; // Skip self loops. + // Update operands due to translation. + for (User::op_iterator i = N->op_begin(), e = N->op_end(); + i != e; ++i) { + DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i); + if (PI != TranslateMap.end()) + *i = PI->second; + } - // The dest block might have PHI nodes, other predecessors and other - // difficult cases. Instead of being smart about this, just insert a new - // block that jumps to the destination block, effectively splitting - // the edge we are about to create. - BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), - RealDest->getName()+".critedge", - RealDest->getParent(), RealDest); - BranchInst::Create(RealDest, EdgeBB); - PHINode *PN; - for (BasicBlock::iterator BBI = RealDest->begin(); - (PN = dyn_cast<PHINode>(BBI)); ++BBI) { - Value *V = PN->getIncomingValueForBlock(BB); - PN->addIncoming(V, EdgeBB); + // Check for trivial simplification. + if (Value *V = SimplifyInstruction(N, TD)) { + TranslateMap[BBI] = V; + delete N; // Instruction folded away, don't need actual inst + } else { + // Insert the new instruction into its new home. + EdgeBB->getInstList().insert(InsertPt, N); + if (!BBI->use_empty()) + TranslateMap[BBI] = N; } + } - // BB may have instructions that are being threaded over. Clone these - // instructions into EdgeBB. We know that there will be no uses of the - // cloned instructions outside of EdgeBB. - BasicBlock::iterator InsertPt = EdgeBB->begin(); - std::map<Value*, Value*> TranslateMap; // Track translated values. - for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { - if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); - } else { - // Clone the instruction. - Instruction *N = BBI->clone(); - if (BBI->hasName()) N->setName(BBI->getName()+".c"); - - // Update operands due to translation. - for (User::op_iterator i = N->op_begin(), e = N->op_end(); - i != e; ++i) { - std::map<Value*, Value*>::iterator PI = - TranslateMap.find(*i); - if (PI != TranslateMap.end()) - *i = PI->second; - } - - // Check for trivial simplification. - if (Constant *C = ConstantFoldInstruction(N)) { - TranslateMap[BBI] = C; - delete N; // Constant folded away, don't need actual inst - } else { - // Insert the new instruction into its new home. - EdgeBB->getInstList().insert(InsertPt, N); - if (!BBI->use_empty()) - TranslateMap[BBI] = N; - } - } + // Loop over all of the edges from PredBB to BB, changing them to branch + // to EdgeBB instead. + TerminatorInst *PredBBTI = PredBB->getTerminator(); + for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) + if (PredBBTI->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredBBTI->setSuccessor(i, EdgeBB); } - - // Loop over all of the edges from PredBB to BB, changing them to branch - // to EdgeBB instead. - TerminatorInst *PredBBTI = PredBB->getTerminator(); - for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) - if (PredBBTI->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB); - PredBBTI->setSuccessor(i, EdgeBB); - } - - // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI) | true; - } + + // Recurse, simplifying any other constants. + return FoldCondBranchOnPHI(BI, TD) | true; } return false; @@ -1154,18 +1175,20 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN) { +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which // subsequently causes this merge to happen. We really want control // dependence information for this check, but simplifycfg can't keep it up // to date, and this catches most of the cases we care about anyway. - // BasicBlock *BB = PN->getParent(); BasicBlock *IfTrue, *IfFalse; Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); - if (!IfCond) return false; + if (!IfCond || + // Don't bother if the branch will be constant folded trivially. + isa<ConstantInt>(IfCond)) + return false; // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. @@ -1177,42 +1200,49 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { if (NumPhis > 2) return false; - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " - << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. - std::set<Instruction*> AggressiveInsts; - - BasicBlock::iterator AfterPHIIt = BB->begin(); - while (isa<PHINode>(AfterPHIIt)) { - PHINode *PN = cast<PHINode>(AfterPHIIt++); - if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) { - if (PN->getIncomingValue(0) != PN) - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - else - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB, - &AggressiveInsts) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, - &AggressiveInsts)) { - return false; + SmallPtrSet<Instruction*, 4> AggressiveInsts; + + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { + PHINode *PN = cast<PHINode>(II++); + if (Value *V = SimplifyInstruction(PN, TD)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; } + + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts)) + return false; } + // If we folded the the first phi, PN dangles at this point. Refresh it. If + // we ran out of PHIs then we simplified them all. + PN = dyn_cast<PHINode>(BB->begin()); + if (PN == 0) return true; + + // Don't fold i1 branches on PHIs which contain binary operators. These can + // often be turned into switches and other things. + if (PN->getType()->isIntegerTy(1) && + (isa<BinaryOperator>(PN->getIncomingValue(0)) || + isa<BinaryOperator>(PN->getIncomingValue(1)) || + isa<BinaryOperator>(IfCond))) + return false; + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not // worth promoting to select instructions. - BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0; - PN = cast<PHINode>(BB->begin()); - BasicBlock *Pred = PN->getIncomingBlock(0); - if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) { - IfBlock1 = Pred; - DomBlock = *pred_begin(Pred); - for (BasicBlock::iterator I = Pred->begin(); - !isa<TerminatorInst>(I); ++I) + BasicBlock *DomBlock = 0; + BasicBlock *IfBlock1 = PN->getIncomingBlock(0); + BasicBlock *IfBlock2 = PN->getIncomingBlock(1); + if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { + IfBlock1 = 0; + } else { + DomBlock = *pred_begin(IfBlock1); + for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control @@ -1221,12 +1251,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { } } - Pred = PN->getIncomingBlock(1); - if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) { - IfBlock2 = Pred; - DomBlock = *pred_begin(Pred); - for (BasicBlock::iterator I = Pred->begin(); - !isa<TerminatorInst>(I); ++I) + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { + IfBlock2 = 0; + } else { + DomBlock = *pred_begin(IfBlock2); + for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control @@ -1234,56 +1263,45 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { return false; } } + + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. - + Instruction *InsertPt = DomBlock->getTerminator(); + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. - if (IfBlock1) { - DomBlock->getInstList().splice(DomBlock->getTerminator(), - IfBlock1->getInstList(), - IfBlock1->begin(), + if (IfBlock1) + DomBlock->getInstList().splice(InsertPt, + IfBlock1->getInstList(), IfBlock1->begin(), IfBlock1->getTerminator()); - } - if (IfBlock2) { - DomBlock->getInstList().splice(DomBlock->getTerminator(), - IfBlock2->getInstList(), - IfBlock2->begin(), + if (IfBlock2) + DomBlock->getInstList().splice(InsertPt, + IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - } while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. - Value *TrueVal = - PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); - Value *FalseVal = - PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt); + Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", InsertPt); PN->replaceAllUsesWith(NV); NV->takeName(PN); - - BB->getInstList().erase(PN); + PN->eraseFromParent(); } + + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement + // has been flattened. Change DomBlock to jump directly to our new block to + // avoid other simplifycfg's kicking in on the diamond. + TerminatorInst *OldTI = DomBlock->getTerminator(); + BranchInst::Create(BB, OldTI); + OldTI->eraseFromParent(); return true; } -/// isTerminatorFirstRelevantInsn - Return true if Term is very first -/// instruction ignoring Phi nodes and dbg intrinsics. -static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) { - BasicBlock::iterator BBI = Term; - while (BBI != BB->begin()) { - --BBI; - if (!isa<DbgInfoIntrinsic>(BBI)) - break; - } - - if (isa<PHINode>(BBI) || &*BBI == Term || isa<DbgInfoIntrinsic>(BBI)) - return true; - return false; -} - /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. @@ -1297,9 +1315,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. - if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet)) + if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator()) return false; - if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet)) + if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) return false; // Okay, we found a branch that is going to two return nodes. If @@ -1386,7 +1404,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // must be at the front of the block. BasicBlock::iterator FrontIt = BB->front(); // Ignore dbg intrinsics. - while(isa<DbgInfoIntrinsic>(FrontIt)) + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; // Allow a single instruction to be hoisted in addition to the compare @@ -1470,7 +1488,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - if (Instruction* I = dyn_cast<Instruction>(Pair.first)) { + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); @@ -1498,9 +1516,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { - Value *NewCond = - BinaryOperator::CreateNot(PBI->getCondition(), + Value *NewCond = PBI->getCondition(); + + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = BinaryOperator::CreateNot(NewCond, PBI->getCondition()->getName()+".not", PBI); + } + PBI->setCondition(NewCond); BasicBlock *OldTrue = PBI->getSuccessor(0); BasicBlock *OldFalse = PBI->getSuccessor(1); @@ -1686,17 +1711,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. - PHINode *PN; - for (BasicBlock::iterator II = OtherDest->begin(); - (PN = dyn_cast<PHINode>(II)); ++II) { - Value *V = PN->getIncomingValueForBlock(BB); - PN->addIncoming(V, PBI->getParent()); - } + AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make // them agree. + PHINode *PN; for (BasicBlock::iterator II = CommonDest->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { Value *BIV = PN->getIncomingValueForBlock(BB); @@ -1718,481 +1739,789 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { return true; } -bool SimplifyCFGOpt::run(BasicBlock *BB) { - bool Changed = false; - Function *M = BB->getParent(); - - assert(BB && BB->getParent() && "Block not embedded in function!"); - assert(BB->getTerminator() && "Degenerate basic block encountered!"); +// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a +// branch to TrueBB if Cond is true or to FalseBB if Cond is false. +// Takes care of updating the successors and removing the old terminator. +// Also makes sure not to introduce new successors by assuming that edges to +// non-successor TrueBBs and FalseBBs aren't reachable. +static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB){ + // Remove any superfluous successor edges from the CFG. + // First, figure out which successors to preserve. + // If TrueBB and FalseBB are equal, only try to preserve one copy of that + // successor. + BasicBlock *KeepEdge1 = TrueBB; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0; + + // Then remove the rest. + for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { + BasicBlock *Succ = OldTerm->getSuccessor(I); + // Make sure only to keep exactly one copy of each edge. + if (Succ == KeepEdge1) + KeepEdge1 = 0; + else if (Succ == KeepEdge2) + KeepEdge2 = 0; + else + Succ->removePredecessor(OldTerm->getParent()); + } - // Remove basic blocks that have no predecessors (except the entry block)... - // or that just have themself as a predecessor. These are unreachable. - if ((pred_begin(BB) == pred_end(BB) && - &BB->getParent()->getEntryBlock() != BB) || - BB->getSinglePredecessor() == BB) { - DEBUG(dbgs() << "Removing BB: \n" << *BB); - DeleteDeadBlock(BB); - return true; + // Insert an appropriate new terminator. + if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { + if (TrueBB == FalseBB) + // We were only looking for one successor, and it was present. + // Create an unconditional branch to it. + BranchInst::Create(TrueBB, OldTerm); + else + // We found both of the successors we were looking for. + // Create a conditional branch sharing the condition of the select. + BranchInst::Create(TrueBB, FalseBB, Cond, OldTerm); + } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { + // Neither of the selected blocks were successors, so this + // terminator must be unreachable. + new UnreachableInst(OldTerm->getContext(), OldTerm); + } else { + // One of the selected values was a successor, but the other wasn't. + // Insert an unconditional branch to the one that was found; + // the edge to the one that wasn't must be unreachable. + if (KeepEdge1 == 0) + // Only TrueBB was found. + BranchInst::Create(TrueBB, OldTerm); + else + // Only FalseBB was found. + BranchInst::Create(FalseBB, OldTerm); } - // Check to see if we can constant propagate this terminator instruction - // away... - Changed |= ConstantFoldTerminator(BB); + EraseTerminatorInstAndDCECond(OldTerm); + return true; +} - // Check for and eliminate duplicate PHI nodes in this block. - Changed |= EliminateDuplicatePHINodes(BB); +// SimplifyIndirectBrOnSelect - Replaces +// (indirectbr (select cond, blockaddress(@fn, BlockA), +// blockaddress(@fn, BlockB))) +// with +// (br cond, BlockA, BlockB). +static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { + // Check that both operands of the select are block addresses. + BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue()); + BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue()); + if (!TBA || !FBA) + return false; - // If there is a trivial two-entry PHI node in this basic block, and we can - // eliminate it, do so now. - if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) - if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN); + // Extract the actual blocks. + BasicBlock *TrueBB = TBA->getBasicBlock(); + BasicBlock *FalseBB = FBA->getBasicBlock(); - // If this is a returning block with only PHI nodes in it, fold the return - // instruction into any unconditional branch predecessors. - // - // If any predecessor is a conditional branch that just selects among - // different return values, fold the replace the branch/return with a select - // and return. - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) { - // Find predecessors that end with branches. - SmallVector<BasicBlock*, 8> UncondBranchPreds; - SmallVector<BranchInst*, 8> CondBranchPreds; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - TerminatorInst *PTI = P->getTerminator(); - if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { - if (BI->isUnconditional()) - UncondBranchPreds.push_back(P); - else - CondBranchPreds.push_back(BI); - } - } + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB); +} - // If we found some, do the transformation! - if (!UncondBranchPreds.empty()) { - while (!UncondBranchPreds.empty()) { - BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); - Instruction *UncondBranch = Pred->getTerminator(); - // Clone the return and add it to the end of the predecessor. - Instruction *NewRet = RI->clone(); - Pred->getInstList().push_back(NewRet); - - // If the return instruction returns a value, and if the value was a - // PHI node in "BB", propagate the right value into the return. - for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast<PHINode>(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); - - // Update any PHI nodes in the returning block to realize that we no - // longer branch to them. - BB->removePredecessor(Pred); - Pred->getInstList().erase(UncondBranch); - } +/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp +/// instruction (a seteq/setne with a constant) as the only instruction in a +/// block that ends with an uncond branch. We are looking for a very specific +/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In +/// this case, we merge the first two "or's of icmp" into a switch, but then the +/// default value goes to an uncond block with a seteq in it, we get something +/// like: +/// +/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] +/// DEFAULT: +/// %tmp = icmp eq i8 %A, 92 +/// br label %end +/// end: +/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] +/// +/// We prefer to split the edge to 'end' so that there is a true/false entry to +/// the PHI, merging the third icmp into the switch. +static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, + const TargetData *TD) { + BasicBlock *BB = ICI->getParent(); + // If the block has any PHIs in it or the icmp has multiple uses, it is too + // complex. + if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false; + + Value *V = ICI->getOperand(0); + ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); + + // The pattern we're looking for is where our only predecessor is a switch on + // 'V' and this block is the default case for the switch. In this case we can + // fold the compared value into the switch to simplify things. + BasicBlock *Pred = BB->getSinglePredecessor(); + if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; + + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); + if (SI->getCondition() != V) + return false; + + // If BB is reachable on a non-default case, then we simply know the value of + // V in this block. Substitute it and constant fold the icmp instruction + // away. + if (SI->getDefaultDest() != BB) { + ConstantInt *VVal = SI->findCaseDest(BB); + assert(VVal && "Should have a unique destination value"); + ICI->setOperand(0, VVal); + + if (Value *V = SimplifyInstruction(ICI, TD)) { + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + } + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB) | true; + } + + // Ok, the block is reachable from the default dest. If the constant we're + // comparing exists in one of the other edges, then we can constant fold ICI + // and zap it. + if (SI->findCaseValue(Cst) != 0) { + Value *V; + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + V = ConstantInt::getFalse(BB->getContext()); + else + V = ConstantInt::getTrue(BB->getContext()); + + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB) | true; + } + + // The use of the icmp has to be in the 'end' block, by the only PHI node in + // the block. + BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); + PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back()); + if (PHIUse == 0 || PHIUse != &SuccBlock->front() || + isa<PHINode>(++BasicBlock::iterator(PHIUse))) + return false; - // If we eliminated all predecessors of the block, delete the block now. - if (pred_begin(BB) == pred_end(BB)) - // We know there are no successors, so just nuke the block. - M->getBasicBlockList().erase(BB); + // If the icmp is a SETEQ, then the default dest gets false, the new edge gets + // true in the PHI. + Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); + Constant *NewCst = ConstantInt::getFalse(BB->getContext()); - return true; - } + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(DefaultCst, NewCst); - // Check out all of the conditional branches going to this return - // instruction. If any of them just select between returns, change the - // branch itself into a select/return pair. - while (!CondBranchPreds.empty()) { - BranchInst *BI = CondBranchPreds.pop_back_val(); - - // Check to see if the non-BB successor is also a return block. - if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && - isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && - SimplifyCondBranchToTwoReturns(BI)) - return true; - } - } - } else if (isa<UnwindInst>(BB->begin())) { - // Check to see if the first instruction in this block is just an unwind. - // If so, replace any invoke instructions which use this as an exception - // destination with call instructions. - // - SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.back(); - if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator())) - if (II->getUnwindDest() == BB) { - // Insert a new branch instruction before the invoke, because this - // is now a fall through. - BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); - Pred->getInstList().remove(II); // Take out of symbol table - - // Insert the call now. - SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); - CallInst *CI = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName(), BI); - CI->setCallingConv(II->getCallingConv()); - CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the Call now does instead. - II->replaceAllUsesWith(CI); - delete II; - Changed = true; - } + // Replace ICI (which is used by the PHI for the default value) with true or + // false depending on if it is EQ or NE. + ICI->replaceAllUsesWith(DefaultCst); + ICI->eraseFromParent(); - Preds.pop_back(); - } + // Okay, the switch goes to this block on a default value. Add an edge from + // the switch to the merge point on the compared value. + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", + BB->getParent(), BB); + SI->addCase(Cst, NewBB); + + // NewBB branches to the phi block, add the uncond branch and the phi entry. + BranchInst::Create(SuccBlock, NewBB); + PHIUse->addIncoming(NewCst, NewBB); + return true; +} - // If this block is now dead, remove it. - if (pred_begin(BB) == pred_end(BB)) { - // We know there are no successors, so just nuke the block. - M->getBasicBlockList().erase(BB); - return true; - } +/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. +/// Check to see if it is branching on an or/and chain of icmp instructions, and +/// fold it into a switch instruction if so. +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) { + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + if (Cond == 0) return false; + + + // Change br (X == 0 | X == 1), T, F into a switch instruction. + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + Value *CompVal = 0; + std::vector<ConstantInt*> Values; + bool TrueWhenEqual = true; + Value *ExtraCase = 0; + unsigned UsedICmps = 0; + + if (Cond->getOpcode() == Instruction::Or) { + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, + UsedICmps); + } else if (Cond->getOpcode() == Instruction::And) { + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false, + UsedICmps); + TrueWhenEqual = false; + } + + // If we didn't have a multiply compared value, fail. + if (CompVal == 0) return false; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (isValueEqualityComparison(SI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) - return SimplifyCFG(BB) || 1; - - // If the block only contains the switch, see if we can fold the block - // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI)) - return SimplifyCFG(BB) || 1; - } - } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - if (BI->isUnconditional()) { - BasicBlock::iterator BBI = BB->getFirstNonPHI(); + // Avoid turning single icmps into a switch. + if (UsedICmps <= 1) + return false; - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (BBI->isTerminator()) // Terminator is the only non-phi instruction! - if (BB != &BB->getParent()->getEntryBlock()) - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) - return true; + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // If Extra was used, we require at least two switch values to do the + // transformation. A switch with one value is just an cond branch. + if (ExtraCase && Values.size() < 2) return false; + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + + BasicBlock *BB = BI->getParent(); + + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() + << " cases into SWITCH. BB is:\n" << *BB); + + // If there are any extra values that couldn't be folded into the switch + // then we evaluate them with an explicit branch first. Split the block + // right before the condbr to handle it. + if (ExtraCase) { + BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test"); + // Remove the uncond branch added to the old block. + TerminatorInst *OldTI = BB->getTerminator(); + + if (TrueWhenEqual) + BranchInst::Create(EdgeBB, NewBB, ExtraCase, OldTI); + else + BranchInst::Create(NewBB, EdgeBB, ExtraCase, OldTI); - } else { // Conditional branch - if (isValueEqualityComparison(BI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this - // switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred)) - return SimplifyCFG(BB) | true; - - // This block must be empty, except for the setcond inst, if it exists. - // Ignore dbg intrinsics. - BasicBlock::iterator I = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(I)) - ++I; - if (&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI)) - return SimplifyCFG(BB) | true; - } else if (&*I == cast<Instruction>(BI->getCondition())){ - ++I; - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(I)) - ++I; - if(&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI)) - return SimplifyCFG(BB) | true; - } - } - } + OldTI->eraseFromParent(); + + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them + // for the edge we just added. + AddPredecessorToBlock(EdgeBB, BB, NewBB); + + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase + << "\nEXTRABB = " << *BB); + BB = NewBB; + } + + // Convert pointer to int before we switch. + if (CompVal->getType()->isPointerTy()) { + assert(TD && "Cannot switch on pointer without TargetData"); + CompVal = new PtrToIntInst(CompVal, + TD->getIntPtrType(CompVal->getContext()), + "magicptr", BI); + } + + // Create the new switch instruction now. + SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + Value *InVal = PN->getIncomingValueForBlock(BB); + for (unsigned i = 0, e = Values.size()-1; i != e; ++i) + PN->addIncoming(InVal, BB); + } + + // Erase the old branch instruction. + EraseTerminatorInstAndDCECond(BI); + + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); + return true; +} - // If this is a branch on a phi node in the current block, thread control - // through this block if any PHI node entries are constants. - if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) - if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI)) - return SimplifyCFG(BB) | true; - - // If this basic block is ONLY a setcc and a branch, and if a predecessor - // branches to us and one of our successors, fold the setcc into the - // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB) | true; +bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) { + BasicBlock *BB = RI->getParent(); + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; + + // Find predecessors that end with branches. + SmallVector<BasicBlock*, 8> UncondBranchPreds; + SmallVector<BranchInst*, 8> CondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { + if (BI->isUnconditional()) + UncondBranchPreds.push_back(P); + else + CondBranchPreds.push_back(BI); + } + } + + // If we found some, do the transformation! + if (!UncondBranchPreds.empty() && DupRet) { + while (!UncondBranchPreds.empty()) { + BasicBlock *Pred = UncondBranchPreds.pop_back_val(); + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred); + } + + // If we eliminated all predecessors of the block, delete the block now. + if (pred_begin(BB) == pred_end(BB)) + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + + return true; + } + + // Check out all of the conditional branches going to this return + // instruction. If any of them just select between returns, change the + // branch itself into a select/return pair. + while (!CondBranchPreds.empty()) { + BranchInst *BI = CondBranchPreds.pop_back_val(); + + // Check to see if the non-BB successor is also a return block. + if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && + isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && + SimplifyCondBranchToTwoReturns(BI)) + return true; + } + return false; +} +bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) { + // Check to see if the first instruction in this block is just an unwind. + // If so, replace any invoke instructions which use this as an exception + // destination with call instructions. + BasicBlock *BB = UI->getParent(); + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - // Scan predecessor blocks for conditional branches. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB) | true; - } - } else if (isa<UnreachableInst>(BB->getTerminator())) { - // If there are any instructions immediately before the unreachable that can - // be removed, do so. - Instruction *Unreachable = BB->getTerminator(); - while (Unreachable != BB->begin()) { - BasicBlock::iterator BBI = Unreachable; - --BBI; - // Do not delete instructions that can have side effects, like calls - // (which may never return) and volatile loads and stores. - if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; - - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) - if (SI->isVolatile()) - break; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) - if (LI->isVolatile()) - break; - - // Delete this instruction - BB->getInstList().erase(BBI); + bool Changed = false; + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.back(); + InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()); + if (II && II->getUnwindDest() == BB) { + // Insert a new branch instruction before the invoke, because this + // is now a fall through. + BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + Pred->getInstList().remove(II); // Take out of symbol table + + // Insert the call now. + SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); + CallInst *CI = CallInst::Create(II->getCalledValue(), + Args.begin(), Args.end(), + II->getName(), BI); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the Call now does instead. + II->replaceAllUsesWith(CI); + delete II; Changed = true; } + + Preds.pop_back(); + } + + // If this block is now dead (and isn't the entry block), remove it. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + return true; + } + + return Changed; +} - // If the unreachable instruction is the first in the block, take a gander - // at all of the predecessors of this instruction, and simplify them. - if (&BB->front() == Unreachable) { - SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - TerminatorInst *TI = Preds[i]->getTerminator(); - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isUnconditional()) { - if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } - } else { - if (BI->getSuccessor(0) == BB) { - BranchInst::Create(BI->getSuccessor(1), BI); - EraseTerminatorInstAndDCECond(BI); - } else if (BI->getSuccessor(1) == BB) { - BranchInst::Create(BI->getSuccessor(0), BI); - EraseTerminatorInstAndDCECond(BI); - Changed = true; - } +bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { + BasicBlock *BB = UI->getParent(); + + bool Changed = false; + + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + while (UI != BB->begin()) { + BasicBlock::iterator BBI = UI; + --BBI; + // Do not delete instructions that can have side effects, like calls + // (which may never return) and volatile loads and stores. + if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; + + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) + if (SI->isVolatile()) + break; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) + if (LI->isVolatile()) + break; + + // Delete this instruction + BBI->eraseFromParent(); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() != UI) return Changed; + + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isUnconditional()) { + if (BI->getSuccessor(0) == BB) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } + } else { + if (BI->getSuccessor(0) == BB) { + BranchInst::Create(BI->getSuccessor(1), BI); + EraseTerminatorInstAndDCECond(BI); + } else if (BI->getSuccessor(1) == BB) { + BranchInst::Create(BI->getSuccessor(0), BI); + EraseTerminatorInstAndDCECond(BI); + Changed = true; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == BB) { + BB->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + Changed = true; + } + // If the default value is unreachable, figure out the most popular + // destination and make it the default. + if (SI->getSuccessor(0) == BB) { + std::map<BasicBlock*, unsigned> Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + Popularity[SI->getSuccessor(i)]++; + + // Find the most popular block. + unsigned MaxPop = 0; + BasicBlock *MaxBlock = 0; + for (std::map<BasicBlock*, unsigned>::iterator + I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { + if (I->second > MaxPop) { + MaxPop = I->second; + MaxBlock = I->first; } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + } + if (MaxBlock) { + // Make this the new default, allowing us to delete any explicit + // edges to it. + SI->setSuccessor(0, MaxBlock); + Changed = true; + + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from + // it. + if (isa<PHINode>(MaxBlock->begin())) + for (unsigned i = 0; i != MaxPop-1; ++i) + MaxBlock->removePredecessor(SI->getParent()); + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == BB) { - BB->removePredecessor(SI->getParent()); + if (SI->getSuccessor(i) == MaxBlock) { SI->removeCase(i); --i; --e; - Changed = true; - } - // If the default value is unreachable, figure out the most popular - // destination and make it the default. - if (SI->getSuccessor(0) == BB) { - std::map<BasicBlock*, unsigned> Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Popularity[SI->getSuccessor(i)]++; - - // Find the most popular block. - unsigned MaxPop = 0; - BasicBlock *MaxBlock = 0; - for (std::map<BasicBlock*, unsigned>::iterator - I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second > MaxPop) { - MaxPop = I->second; - MaxBlock = I->first; - } - } - if (MaxBlock) { - // Make this the new default, allowing us to delete any explicit - // edges to it. - SI->setSuccessor(0, MaxBlock); - Changed = true; - - // If MaxBlock has phinodes in it, remove MaxPop-1 entries from - // it. - if (isa<PHINode>(MaxBlock->begin())) - for (unsigned i = 0; i != MaxPop-1; ++i) - MaxBlock->removePredecessor(SI->getParent()); - - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == MaxBlock) { - SI->removeCase(i); - --i; --e; - } } - } - } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { - if (II->getUnwindDest() == BB) { - // Convert the invoke to a call instruction. This would be a good - // place to note that the call does not throw though. - BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); - II->removeFromParent(); // Take out of symbol table - - // Insert the call now... - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); - CallInst *CI = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName(), BI); - CI->setCallingConv(II->getCallingConv()); - CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the call does now instead. - II->replaceAllUsesWith(CI); - delete II; - Changed = true; - } } } - - // If this block is now dead, remove it. - if (pred_begin(BB) == pred_end(BB) && - BB != &BB->getParent()->getEntryBlock()) { - // We know there are no successors, so just nuke the block. - M->getBasicBlockList().erase(BB); - return true; - } - } - } else if (IndirectBrInst *IBI = - dyn_cast<IndirectBrInst>(BB->getTerminator())) { - // Eliminate redundant destinations. - SmallPtrSet<Value *, 8> Succs; - for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - BasicBlock *Dest = IBI->getDestination(i); - if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) { - Dest->removePredecessor(BB); - IBI->removeDestination(i); - --i; --e; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + // Convert the invoke to a call instruction. This would be a good + // place to note that the call does not throw though. + BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + II->removeFromParent(); // Take out of symbol table + + // Insert the call now... + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); + CallInst *CI = CallInst::Create(II->getCalledValue(), + Args.begin(), Args.end(), + II->getName(), BI); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the call does now instead. + II->replaceAllUsesWith(CI); + delete II; Changed = true; } - } + } + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + return true; + } - if (IBI->getNumDestinations() == 0) { - // If the indirectbr has no successors, change it to unreachable. - new UnreachableInst(IBI->getContext(), IBI); - IBI->eraseFromParent(); - Changed = true; - } else if (IBI->getNumDestinations() == 1) { - // If the indirectbr has one successor, change it to a direct branch. - BranchInst::Create(IBI->getDestination(0), IBI); - IBI->eraseFromParent(); + return Changed; +} + +/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a +/// integer range comparison into a sub, an icmp and a branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) { + assert(SI->getNumCases() > 2 && "Degenerate switch?"); + + // Make sure all cases point to the same destination and gather the values. + SmallVector<ConstantInt *, 16> Cases; + Cases.push_back(SI->getCaseValue(1)); + for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) { + if (SI->getSuccessor(I-1) != SI->getSuccessor(I)) + return false; + Cases.push_back(SI->getCaseValue(I)); + } + assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered"); + + // Sort the case values, then check if they form a range we can transform. + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (unsigned I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I-1]->getValue() != Cases[I]->getValue()+1) + return false; + } + + Constant *Offset = ConstantExpr::getNeg(Cases.back()); + Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1); + + Value *Sub = SI->getCondition(); + if (!Offset->isNullValue()) + Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI); + Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch"); + BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI); + + // Prune obsolete incoming values off the successor's PHI nodes. + for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin(); + isa<PHINode>(BBI); ++BBI) { + for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + SI->eraseFromParent(); + + return true; +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { + // If this switch is too complex to want to look at, ignore it. + if (!isValueEqualityComparison(SI)) + return false; + + BasicBlock *BB = SI->getParent(); + + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) + return SimplifyCFG(BB) | true; + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI)) + return SimplifyCFG(BB) | true; + + // Try to transform the switch into an icmp and a branch. + if (TurnSwitchRangeIntoICmp(SI)) + return SimplifyCFG(BB) | true; + + return false; +} + +bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; --e; Changed = true; } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; } + + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { + if (SimplifyIndirectBrOnSelect(IBI, SI)) + return SimplifyCFG(BB) | true; + } + return Changed; +} - // Merge basic blocks into their predecessor if there is only one distinct - // pred, and if there is only one distinct successor of the predecessor, and - // if there are no PHI nodes. - // - if (MergeBlockIntoPredecessor(BB)) +bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) { + BasicBlock *BB = BI->getParent(); + + // If the Terminator is the only non-phi instruction, simplify the block. + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; + + // If the only instruction in the block is a seteq/setne comparison + // against a constant, try to simplify the block. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD)) + return true; + } + + return false; +} - // Otherwise, if this block only has a single predecessor, and if that block - // is a conditional branch, see if we can hoist any code from this block up - // into our predecessor. - pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - BasicBlock *OnlyPred = 0; - for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same - if (!OnlyPred) - OnlyPred = *PI; - else if (*PI != OnlyPred) { - OnlyPred = 0; // There are multiple different predecessors... - break; + +bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) { + BasicBlock *BB = BI->getParent(); + + // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred)) + return SimplifyCFG(BB) | true; + + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg intrinsics. + BasicBlock::iterator I = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI)) + return SimplifyCFG(BB) | true; + } else if (&*I == cast<Instruction>(BI->getCondition())){ + ++I; + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI)) + return SimplifyCFG(BB) | true; } } - if (OnlyPred) - if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator())) - if (BI->isConditional()) { - // Get the other block. - BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB); - PI = pred_begin(OtherBB); - ++PI; - - if (PI == pred_end(OtherBB)) { - // We have a conditional branch to two blocks that are only reachable - // from the condbr. We know that the condbr dominates the two blocks, - // so see if there is any identical code in the "then" and "else" - // blocks. If so, we can hoist it up to the branching block. - Changed |= HoistThenElseCodeToIf(BI); - } else { - BasicBlock* OnlySucc = NULL; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { - if (!OnlySucc) - OnlySucc = *SI; - else if (*SI != OnlySucc) { - OnlySucc = 0; // There are multiple distinct successors! - break; - } - } + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. + if (SimplifyBranchOnICmpChain(BI, TD)) + return true; + + // We have a conditional branch to two blocks that are only reachable + // from BI. We know that the condbr dominates the two blocks, so see if + // there is any identical code in the "then" and "else" blocks. If so, we + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { + if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + if (HoistThenElseCodeToIf(BI)) + return SimplifyCFG(BB) | true; + } else { + // If Successor #1 has multiple preds, we may be able to conditionally + // execute Successor #0 if it branches to successor #1. + TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); + if (Succ0TI->getNumSuccessors() == 1 && + Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + return SimplifyCFG(BB) | true; + } + } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + // If Successor #0 has multiple preds, we may be able to conditionally + // execute Successor #1 if it branches to successor #0. + TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); + if (Succ1TI->getNumSuccessors() == 1 && + Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + return SimplifyCFG(BB) | true; + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI, TD)) + return SimplifyCFG(BB) | true; + + // If this basic block is ONLY a setcc and a branch, and if a predecessor + // branches to us and one of our successors, fold the setcc into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB) | true; + + // Scan predecessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI)) + return SimplifyCFG(BB) | true; - if (OnlySucc == OtherBB) { - // If BB's only successor is the other successor of the predecessor, - // i.e. a triangle, see if we can hoist any code from this block up - // to the "if" block. - Changed |= SpeculativelyExecuteBB(BI, BB); - } - } - } + return false; +} - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator())) - // Change br (X == 0 | X == 1), T, F into a switch instruction. - if (BI->isConditional() && isa<Instruction>(BI->getCondition())) { - Instruction *Cond = cast<Instruction>(BI->getCondition()); - // If this is a bunch of seteq's or'd together, or if it's a bunch of - // 'setne's and'ed together, collect them. - Value *CompVal = 0; - std::vector<ConstantInt*> Values; - bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values); - if (CompVal) { - // There might be duplicate constants in the list, which the switch - // instruction can't handle, remove them now. - std::sort(Values.begin(), Values.end(), ConstantIntOrdering()); - Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - - // Figure out which block is which destination. - BasicBlock *DefaultBB = BI->getSuccessor(1); - BasicBlock *EdgeBB = BI->getSuccessor(0); - if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - - // Convert pointer to int before we switch. - if (CompVal->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); - CompVal = new PtrToIntInst(CompVal, - TD->getIntPtrType(CompVal->getContext()), - "magicptr", BI); - } +bool SimplifyCFGOpt::run(BasicBlock *BB) { + bool Changed = false; - // Create the new switch instruction now. - SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, - Values.size(), BI); - - // Add all of the 'cases' to the switch instruction. - for (unsigned i = 0, e = Values.size(); i != e; ++i) - New->addCase(Values[i], EdgeBB); - - // We added edges from PI to the EdgeBB. As such, if there were any - // PHI nodes in EdgeBB, they need entries to be added corresponding to - // the number of edges added. - for (BasicBlock::iterator BBI = EdgeBB->begin(); - isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - Value *InVal = PN->getIncomingValueForBlock(*PI); - for (unsigned i = 0, e = Values.size()-1; i != e; ++i) - PN->addIncoming(InVal, *PI); - } + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); - // Erase the old branch instruction. - EraseTerminatorInstAndDCECond(BI); - return true; - } - } + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) || + BB->getSinglePredecessor() == BB) { + DEBUG(dbgs() << "Removing BB: \n" << *BB); + DeleteDeadBlock(BB); + return true; + } + + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB); + + // Check for and eliminate duplicate PHI nodes in this block. + Changed |= EliminateDuplicatePHINodes(BB); + + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + // + if (MergeBlockIntoPredecessor(BB)) + return true; + + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN, TD); + + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (BI->isUnconditional()) { + if (SimplifyUncondBranch(BI)) return true; + } else { + if (SimplifyCondBranch(BI)) return true; + } + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + if (SimplifyReturn(RI)) return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (SimplifySwitch(SI)) return true; + } else if (UnreachableInst *UI = + dyn_cast<UnreachableInst>(BB->getTerminator())) { + if (SimplifyUnreachable(UI)) return true; + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + if (SimplifyUnwind(UI)) return true; + } else if (IndirectBrInst *IBI = + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + if (SimplifyIndirectBr(IBI)) return true; + } return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp new file mode 100644 index 0000000..ac005f9 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -0,0 +1,94 @@ +//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instsimplify" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +STATISTIC(NumSimplified, "Number of redundant instructions removed"); + +namespace { + struct InstSimplifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstSimplifier() : FunctionPass(ID) { + initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + + /// runOnFunction - Remove instructions that simplify. + bool runOnFunction(Function &F) { + const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + bool Changed = false; + + do { + for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), + DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) + for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) { + Instruction *I = BI++; + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + // Don't waste time simplifying unused instructions. + if (!I->use_empty()) + if (Value *V = SimplifyInstruction(I, TD, DT)) { + // Mark all uses for resimplification next time round the loop. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Next->insert(cast<Instruction>(*UI)); + I->replaceAllUsesWith(V); + ++NumSimplified; + Changed = true; + } + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + } while (!ToSimplify->empty()); + + return Changed; + } + }; +} + +char InstSimplifier::ID = 0; +INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions", + false, false) +char &llvm::InstructionSimplifierID = InstSimplifier::ID; + +// Public interface to the simplify instructions pass. +FunctionPass *llvm::createInstructionSimplifierPass() { + return new InstSimplifier(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index a51f1e1..ccb8287 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -25,7 +25,7 @@ using namespace llvm; char UnifyFunctionExitNodes::ID = 0; INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", - "Unify function exit nodes", false, false); + "Unify function exit nodes", false, false) Pass *llvm::createUnifyFunctionExitNodesPass() { return new UnifyFunctionExitNodes(); diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp new file mode 100644 index 0000000..24e8c8f --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -0,0 +1,37 @@ +//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// TransformUtils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeTransformUtils - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeTransformUtils(PassRegistry &Registry) { + initializeBreakCriticalEdgesPass(Registry); + initializeInstNamerPass(Registry); + initializeLCSSAPass(Registry); + initializeLoopSimplifyPass(Registry); + initializeLowerInvokePass(Registry); + initializeLowerSwitchPass(Registry); + initializePromotePassPass(Registry); + initializeUnifyFunctionExitNodesPass(Registry); + initializeInstSimplifierPass(Registry); +} + +/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. +void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { + initializeTransformUtils(*unwrap(R)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index fc4bde7..f5481d3 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -21,147 +21,111 @@ using namespace llvm; Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, - bool ModuleLevelChanges) { - Value *&VMSlot = VM[V]; - if (VMSlot) return VMSlot; // Does it exist in the map yet? + RemapFlags Flags) { + ValueToValueMapTy::iterator I = VM.find(V); + + // If the value already exists in the map, use it. + if (I != VM.end() && I->second) return I->second; - // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the - // DenseMap. This includes any recursive calls to MapValue. - // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) || - (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() && - !ModuleLevelChanges)) - return VMSlot = const_cast<Value*>(V); + if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V)) + return VM[V] = const_cast<Value*>(V); if (const MDNode *MD = dyn_cast<MDNode>(V)) { - // Start by assuming that we'll use the identity mapping. - VMSlot = const_cast<Value*>(V); - + // If this is a module-level metadata and we know that nothing at the module + // level is changing, then use an identity mapping. + if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges)) + return VM[V] = const_cast<Value*>(V); + + // Create a dummy node in case we have a metadata cycle. + MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); + VM[V] = Dummy; + // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue; + if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue; - // Ok, at least one operand needs remapping. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); - VM[V] = Dummy; + // Ok, at least one operand needs remapping. SmallVector<Value*, 4> Elts; Elts.reserve(MD->getNumOperands()); - for (i = 0; i != e; ++i) - Elts.push_back(MD->getOperand(i) ? - MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0); + for (i = 0; i != e; ++i) { + Value *Op = MD->getOperand(i); + Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); + } MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size()); Dummy->replaceAllUsesWith(NewMD); + VM[V] = NewMD; MDNode::deleteTemporary(Dummy); - return VM[V] = NewMD; + return NewMD; } - // No operands needed remapping; keep the identity map. + VM[V] = const_cast<Value*>(V); + MDNode::deleteTemporary(Dummy); + + // No operands needed remapping. Use an identity mapping. return const_cast<Value*>(V); } + // Okay, this either must be a constant (which may or may not be mappable) or + // is something that is not in the mapping table. Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); if (C == 0) return 0; - if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || - isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) || - isa<UndefValue>(C)) - return VMSlot = C; // Primitive constants map directly - - if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { - for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end(); - i != e; ++i) { - Value *MV = MapValue(*i, VM, ModuleLevelChanges); - if (MV != *i) { - // This array must contain a reference to a global, make a new array - // and return it. - // - std::vector<Constant*> Values; - Values.reserve(CA->getNumOperands()); - for (User::op_iterator j = b; j != i; ++j) - Values.push_back(cast<Constant>(*j)); - Values.push_back(cast<Constant>(MV)); - for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM, - ModuleLevelChanges))); - return VM[V] = ConstantArray::get(CA->getType(), Values); - } - } - return VM[V] = C; - } - - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { - for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end(); - i != e; ++i) { - Value *MV = MapValue(*i, VM, ModuleLevelChanges); - if (MV != *i) { - // This struct must contain a reference to a global, make a new struct - // and return it. - // - std::vector<Constant*> Values; - Values.reserve(CS->getNumOperands()); - for (User::op_iterator j = b; j != i; ++j) - Values.push_back(cast<Constant>(*j)); - Values.push_back(cast<Constant>(MV)); - for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM, - ModuleLevelChanges))); - return VM[V] = ConstantStruct::get(CS->getType(), Values); - } - } - return VM[V] = C; + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { + Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags)); + BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, + Flags)); + return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + Value *Op = C->getOperand(i); + Value *Mapped = MapValue(Op, VM, Flags); + if (Mapped == C) continue; + + // Okay, the operands don't all match. We've already processed some or all + // of the operands, set them up now. std::vector<Constant*> Ops; - for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges))); - return VM[V] = CE->getWithOperands(Ops); + Ops.reserve(C->getNumOperands()); + for (unsigned j = 0; j != i; ++j) + Ops.push_back(cast<Constant>(C->getOperand(i))); + Ops.push_back(cast<Constant>(Mapped)); + + // Map the rest of the operands that aren't processed yet. + for (++i; i != e; ++i) + Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags))); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return VM[V] = CE->getWithOperands(Ops); + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) + return VM[V] = ConstantArray::get(CA->getType(), Ops); + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) + return VM[V] = ConstantStruct::get(CS->getType(), Ops); + assert(isa<ConstantVector>(C) && "Unknown mapped constant type"); + return VM[V] = ConstantVector::get(Ops); } - - if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { - for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end(); - i != e; ++i) { - Value *MV = MapValue(*i, VM, ModuleLevelChanges); - if (MV != *i) { - // This vector value must contain a reference to a global, make a new - // vector constant and return it. - // - std::vector<Constant*> Values; - Values.reserve(CV->getNumOperands()); - for (User::op_iterator j = b; j != i; ++j) - Values.push_back(cast<Constant>(*j)); - Values.push_back(cast<Constant>(MV)); - for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM, - ModuleLevelChanges))); - return VM[V] = ConstantVector::get(Values); - } - } - return VM[V] = C; - } - - BlockAddress *BA = cast<BlockAddress>(C); - Function *F = cast<Function>(MapValue(BA->getFunction(), VM, - ModuleLevelChanges)); - BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM, - ModuleLevelChanges)); - return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); + + // If we reach here, all of the operands of the constant match. + return VM[V] = C; } /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - bool ModuleLevelChanges) { + RemapFlags Flags) { // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, ModuleLevelChanges); - assert(V && "Referenced value not in value map!"); - *op = V; + Value *V = MapValue(*op, VMap, Flags); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + *op = V; + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced value not in value map!"); } // Remap attached metadata. @@ -170,7 +134,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { Value *Old = MI->second; - Value *New = MapValue(Old, VMap, ModuleLevelChanges); + Value *New = MapValue(Old, VMap, Flags); if (New != Old) I->setMetadata(MI->first, cast<MDNode>(New)); } |