diff options
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 391 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GlobalMerge.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopRotation.cpp | 43 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Reassociate.cpp | 35 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 31 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 17 |
7 files changed, 269 insertions, 262 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 015fd2e..f0d29c8 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/Analysis/DominatorInternals.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -88,7 +89,7 @@ namespace { /// Keeps track of non-local addresses that have been sunk into a block. /// This allows us to avoid inserting duplicate code for blocks with /// multiple load/stores of the same address. - DenseMap<Value*, Value*> SunkAddrs; + ValueMap<Value*, Value*> SunkAddrs; /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to /// be updated. @@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // start of the block. CurInstIterator = BB->begin(); SunkAddrs.clear(); - } else { - // This address is now available for reassignment, so erase the table - // entry; we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; } } ++NumMemoryInsts; @@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { if (!DefIsLiveOut) return false; - // Make sure non of the uses are PHI nodes. + // Make sure none of the uses are PHI nodes. for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 129af8d..f350b9b 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { + class GVN; + struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. + MemIntrin // A memory intrinsic which is loaded from. + }; + + /// V - The value that is live out of the block. + PointerIntPair<Value *, 2, ValType> Val; + + /// Offset - The byte offset in Val that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(isMemIntrinValue() && "Wrong accessor"); + return cast<MemIntrinsic>(Val.getPointer()); + } + + /// MaterializeAdjustedValue - Emit code into this block to adjust the value + /// defined here to the specified type. This handles various coercion cases. + Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; + }; class GVN : public FunctionPass { bool NoLoads; @@ -519,6 +588,11 @@ namespace { BumpPtrAllocator TableAllocator; SmallVector<Instruction*, 8> InstrsToErase; + + typedef SmallVector<NonLocalDepResult, 64> LoadDepVect; + typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect; + typedef SmallVector<BasicBlock*, 64> UnavailBlkVect; + public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) @@ -599,11 +673,17 @@ namespace { } - // Helper fuctions - // FIXME: eliminate or document these better + // Helper fuctions of redundant load elimination bool processLoad(LoadInst *L); - bool processInstruction(Instruction *I); bool processNonLocalLoad(LoadInst *L); + void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, + AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + + // Other helper routines + bool processInstruction(Instruction *I); bool processBlock(BasicBlock *BB); void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); @@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, return ConstantFoldLoadFromConstPtr(Src, &TD); } -namespace { - -struct AvailableValueInBlock { - /// BB - The basic block in question. - BasicBlock *BB; - enum ValType { - SimpleVal, // A simple offsetted value that is accessed. - LoadVal, // A value produced by a load. - MemIntrin // A memory intrinsic which is loaded from. - }; - - /// V - The value that is live out of the block. - PointerIntPair<Value *, 2, ValType> Val; - - /// Offset - The byte offset in Val that is interesting for the load query. - unsigned Offset; - - static AvailableValueInBlock get(BasicBlock *BB, Value *V, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(V); - Res.Val.setInt(SimpleVal); - Res.Offset = Offset; - return Res; - } - - static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(MI); - Res.Val.setInt(MemIntrin); - Res.Offset = Offset; - return Res; - } - - static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(LI); - Res.Val.setInt(LoadVal); - Res.Offset = Offset; - return Res; - } - - bool isSimpleValue() const { return Val.getInt() == SimpleVal; } - bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } - bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } - - Value *getSimpleValue() const { - assert(isSimpleValue() && "Wrong accessor"); - return Val.getPointer(); - } - - LoadInst *getCoercedLoadValue() const { - assert(isCoercedLoadValue() && "Wrong accessor"); - return cast<LoadInst>(Val.getPointer()); - } - - MemIntrinsic *getMemIntrinValue() const { - assert(isMemIntrinValue() && "Wrong accessor"); - return cast<MemIntrinsic>(Val.getPointer()); - } - - /// MaterializeAdjustedValue - Emit code into this block to adjust the value - /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { - Value *Res; - if (isSimpleValue()) { - Res = getSimpleValue(); - if (Res->getType() != LoadTy) { - const DataLayout *TD = gvn.getDataLayout(); - assert(TD && "Need target data to handle type mismatch case"); - Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), - *TD); - - DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " - << *getSimpleValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - } else if (isCoercedLoadValue()) { - LoadInst *Load = getCoercedLoadValue(); - if (Load->getType() == LoadTy && Offset == 0) { - Res = Load; - } else { - Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), - gvn); - - DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " - << *getCoercedLoadValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - } else { - const DataLayout *TD = gvn.getDataLayout(); - assert(TD && "Need target data to handle type mismatch case"); - Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, - LoadTy, BB->getTerminator(), *TD); - DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset - << " " << *getMemIntrinValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - return Res; - } -}; - -} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value @@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } +Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { + Value *Res; + if (isSimpleValue()) { + Res = getSimpleValue(); + if (Res->getType() != LoadTy) { + const DataLayout *TD = gvn.getDataLayout(); + assert(TD && "Need target data to handle type mismatch case"); + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), + *TD); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *getSimpleValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else { + const DataLayout *TD = gvn.getDataLayout(); + assert(TD && "Need target data to handle type mismatch case"); + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *getMemIntrinValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + return Res; +} + static bool isLifetimeStart(const Instruction *Inst) { if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } -/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are -/// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI) { - // Find the non-local dependencies of the load. - SmallVector<NonLocalDepResult, 64> Deps; - AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); - MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); - //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " - // << Deps.size() << *LI << '\n'); - - // If we had to process more than one hundred blocks to find the - // dependencies, this load isn't worth worrying about. Optimizing - // it will be too expensive. - unsigned NumDeps = Deps.size(); - if (NumDeps > 100) - return false; - - // If we had a phi translation failure, we'll have a single entry which is a - // clobber in the current block. Reject this early. - if (NumDeps == 1 && - !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { - DEBUG( - dbgs() << "GVN: non-local load "; - WriteAsOperand(dbgs(), LI); - dbgs() << " has unknown dependencies\n"; - ); - return false; - } +void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, + AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector<AvailableValueInBlock, 64> ValuesPerBlock; - SmallVector<BasicBlock*, 64> UnavailableBlocks; - + unsigned NumDeps = Deps.size(); for (unsigned i = 0, e = NumDeps; i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { } UnavailableBlocks.push_back(DepBB); - continue; } +} - // If we have no predecessors that produce a known value for this load, exit - // early. - if (ValuesPerBlock.empty()) return false; - - // If all of the instructions we depend on produce a known value for this - // load, then it is fully redundant and we can use PHI insertion to compute - // its value. Insert PHIs and remove the fully redundant value now. - if (UnavailableBlocks.empty()) { - DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - - // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); - LI->replaceAllUsesWith(V); - - if (isa<PHINode>(V)) - V->takeName(LI); - if (V->getType()->getScalarType()->isPointerTy()) - MD->invalidateCachedPointerInfo(V); - markInstructionForDeletion(LI); - ++NumGVNLoad; - return true; - } - - if (!EnablePRE || !EnableLoadPRE) - return false; - +bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { // Okay, we have *some* definitions of the value. This means that the value // is available in some of our (transitive) predecessors. Lets think about // doing PRE of this load. This will involve inserting a new load into the @@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // pointer if it is not available. PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; - if (allSingleSucc) { - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); - } else { - Address.PHITranslateValue(LoadBB, UnavailablePred, DT); - LoadPtr = Address.getAddr(); - } + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. @@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { break; } - // Make sure it is valid to move this load here. We have to watch out for: - // @1 = getelementptr (i8* p, ... - // test p and branch if == 0 - // load @1 - // It is valid to have the getelementptr before the test, even if p can - // be 0, as getelementptr only does address arithmetic. - // If we are not pushing the value through any multiple-successor blocks - // we do not have this case. Otherwise, check that the load is safe to - // put anywhere; this can be improved, but should be conservatively safe. - if (!allSingleSucc && - // FIXME: REEVALUTE THIS. - !isSafeToLoadUnconditionally(LoadPtr, - UnavailablePred->getTerminator(), - LI->getAlignment(), TD)) { - CanDoPRE = false; - break; - } - I->second = LoadPtr; } @@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } +/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are +/// non-local by performing PHI construction. +bool GVN::processNonLocalLoad(LoadInst *LI) { + // Step 1: Find the non-local dependencies of the load. + LoadDepVect Deps; + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); + + // If we had to process more than one hundred blocks to find the + // dependencies, this load isn't worth worrying about. Optimizing + // it will be too expensive. + unsigned NumDeps = Deps.size(); + if (NumDeps > 100) + return false; + + // If we had a phi translation failure, we'll have a single entry which is a + // clobber in the current block. Reject this early. + if (NumDeps == 1 && + !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { + DEBUG( + dbgs() << "GVN: non-local load "; + WriteAsOperand(dbgs(), LI); + dbgs() << " has unknown dependencies\n"; + ); + return false; + } + + // Step 2: Analyze the availability of the load + AvailValInBlkVect ValuesPerBlock; + UnavailBlkVect UnavailableBlocks; + AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); + + // If we have no predecessors that produce a known value for this load, exit + // early. + if (ValuesPerBlock.empty()) + return false; + + // Step 3: Eliminate fully redundancy. + // + // If all of the instructions we depend on produce a known value for this + // load, then it is fully redundant and we can use PHI insertion to compute + // its value. Insert PHIs and remove the fully redundant value now. + if (UnavailableBlocks.empty()) { + DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); + + // Perform PHI construction. + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); + LI->replaceAllUsesWith(V); + + if (isa<PHINode>(V)) + V->takeName(LI); + if (V->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + markInstructionForDeletion(LI); + ++NumGVNLoad; + return true; + } + + // Step 4: Eliminate partial redundancy. + if (!EnablePRE || !EnableLoadPRE) + return false; + + return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); +} + + static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 5d02c68..4796eb2 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) { if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. - const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (InitList == 0) return; - + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (const GlobalVariable *G = dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index e98ae95..14c5655 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -56,8 +56,8 @@ namespace { } bool runOnLoop(Loop *L, LPPassManager &LPM); - void simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L); + bool simplifyLoopLatch(Loop *L); + bool rotateLoop(Loop *L, bool SimplifiedLatch); private: LoopInfo *LI; @@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the // loop exit. - simplifyLoopLatch(L); + bool SimplifiedLatch = simplifyLoopLatch(L); // One loop can be rotated multiple times. bool MadeChange = false; - while (rotateLoop(L)) + while (rotateLoop(L, SimplifiedLatch)) { MadeChange = true; - + SimplifiedLatch = false; + } return MadeChange; } @@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, /// canonical form so downstream passes can handle it. /// /// I don't believe this invalidates SCEV. -void LoopRotate::simplifyLoopLatch(Loop *L) { +bool LoopRotate::simplifyLoopLatch(Loop *L) { BasicBlock *Latch = L->getLoopLatch(); if (!Latch || Latch->hasAddressTaken()) - return; + return false; BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); if (!Jmp || !Jmp->isUnconditional()) - return; + return false; BasicBlock *LastExit = Latch->getSinglePredecessor(); if (!LastExit || !L->isLoopExiting(LastExit)) - return; + return false; BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); if (!BI) - return; + return false; if (!shouldSpeculateInstrs(Latch->begin(), Jmp)) - return; + return false; DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " << LastExit->getName() << "\n"); @@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) { if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) DT->eraseNode(Latch); Latch->eraseFromParent(); + return true; } /// Rotate loop LP. Return true if the loop is rotated. -bool LoopRotate::rotateLoop(Loop *L) { +/// +/// \param SimplifiedLatch is true if the latch was just folded into the final +/// loop exit. In this case we may want to rotate even though the new latch is +/// now an exiting branch. This rotation would have happened had the latch not +/// been simplified. However, if SimplifiedLatch is false, then we avoid +/// rotating loops in which the latch exits to avoid excessive or endless +/// rotation. LoopRotate should be repeatable and converge to a canonical +/// form. This property is satisfied because simplifying the loop latch can only +/// happen once across multiple invocations of the LoopRotate pass. +bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; @@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) + if (OrigLatch == 0) + return false; + + // Rotate if either the loop latch does *not* exit the loop, or if the loop + // latch was just simplified. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) return false; // Check size of original header and reject loop if it is very big or we can't @@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) { ++NumRotated; return true; } - diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 7ee4027..a3c241d 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -143,13 +143,9 @@ namespace { // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", // "z" in the order of X-Y-Z is better than any other orders. - class PtrSortFunctor { - ArrayRef<XorOpnd> A; - - public: - PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {} - bool operator()(unsigned LHSIndex, unsigned RHSIndex) { - return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank(); + struct PtrSortFunctor { + bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) { + return LHS->getSymbolicRank() < RHS->getSymbolicRank(); } }; private: @@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, if (X != Opnd2->getSymbolicPart()) return false; - const APInt &C1 = Opnd1->getConstPart(); - const APInt &C2 = Opnd2->getConstPart(); - // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) int DeadInstNum = 1; if (Opnd1->getValue()->hasOneUse()) @@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, if (Opnd2->isOrExpr()) std::swap(Opnd1, Opnd2); + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); APInt C3((~C1) ^ C2); // Do not increase code size! @@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, } else if (Opnd1->isOrExpr()) { // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 // + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); APInt C3 = C1 ^ C2; // Do not increase code size @@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, } else { // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) // + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); APInt C3 = C1 ^ C2; Res = createAndInstr(I, X, C3); } @@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, return 0; SmallVector<XorOpnd, 8> Opnds; - SmallVector<unsigned, 8> OpndIndices; + SmallVector<XorOpnd*, 8> OpndPtrs; Type *Ty = Ops[0].Op->getType(); APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); @@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I, XorOpnd O(V); O.setSymbolicRank(getRank(O.getSymbolicPart())); Opnds.push_back(O); - OpndIndices.push_back(Opnds.size() - 1); } else ConstOpnd ^= cast<ConstantInt>(V)->getValue(); } + // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". + // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate + // the "OpndPtrs" as well. For the similar reason, do not fuse this loop + // with the previous loop --- the iterator of the "Opnds" may be invalidated + // when new elements are added to the vector. + for (unsigned i = 0, e = Opnds.size(); i != e; ++i) + OpndPtrs.push_back(&Opnds[i]); + // Step 2: Sort the Xor-Operands in a way such that the operands containing // the same symbolic value cluster together. For instance, the input operand // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: // ("x | 123", "x & 789", "y & 456"). - std::sort(OpndIndices.begin(), OpndIndices.end(), - XorOpnd::PtrSortFunctor(Opnds)); + std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); // Step 3: Combine adjacent operands XorOpnd *PrevOpnd = 0; bool Changed = false; for (unsigned i = 0, e = Opnds.size(); i < e; i++) { - XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]]; + XorOpnd *CurrOpnd = OpndPtrs[i]; // The combined value Value *CV; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index f6bb365..d073e78 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(Mask), Name + ".expand"); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); Mask.clear(); for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), - Name + "insert"); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); + + V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); + + DEBUG(dbgs() << " blend: " << *V << "\n"); return V; } @@ -2671,6 +2669,7 @@ private: StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset && canConvertValue(TD, V->getType(), NewAllocaTy)) { V = convertValue(TD, IRB, V, NewAllocaTy); NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), @@ -3050,16 +3049,16 @@ private: bool visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - - // Find the operand we need to rewrite here. - bool IsTrueVal = SI.getTrueValue() == OldPtr; - if (IsTrueVal) - assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!"); - else - assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!"); + assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && + "Pointer isn't an operand!"); Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType()); - SI.setOperand(IsTrueVal ? 1 : 2, NewPtr); + // Replace the operands which were using the old pointer. + if (SI.getOperand(1) == OldPtr) + SI.setOperand(1, NewPtr); + if (SI.getOperand(2) == OldPtr) + SI.setOperand(2, NewPtr); + DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); return false; diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e590a37..bfde334 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { } // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the alloca instructions in the function, removing them -// if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the entry block, removing +// them if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, continue; ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); - if (!IdxVal) { - // Non constant GEPs are only a problem on arrays, structs, and pointers - // Vectors can be dynamically indexed. - // FIXME: Add support for dynamic indexing on arrays. This should be - // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0] - // isn't. - if (!(*GEPIt)->isVectorTy()) - return MarkUnsafe(Info, GEPI); - NonConstant = true; - NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt); - } + if (!IdxVal) + return MarkUnsafe(Info, GEPI); } // Compute the offset due to this GEP and check if the alloca has a |