diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/GVN.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/GVN.cpp | 252 |
1 files changed, 226 insertions, 26 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index f350b9b..6af269d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -45,6 +47,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include <vector> using namespace llvm; using namespace PatternMatch; @@ -505,7 +508,9 @@ namespace { enum ValType { SimpleVal, // A simple offsetted value that is accessed. LoadVal, // A value produced by a load. - MemIntrin // A memory intrinsic which is loaded from. + MemIntrin, // A memory intrinsic which is loaded from. + UndefVal // A UndefValue representing a value from dead block (which + // is not yet physically removed from the CFG). }; /// V - The value that is live out of the block. @@ -543,10 +548,20 @@ namespace { Res.Offset = Offset; return Res; } - + + static AvailableValueInBlock getUndef(BasicBlock *BB) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(0); + Res.Val.setInt(UndefVal); + Res.Offset = 0; + return Res; + } + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + bool isUndefValue() const { return Val.getInt() == UndefVal; } Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); @@ -574,6 +589,7 @@ namespace { DominatorTree *DT; const DataLayout *TD; const TargetLibraryInfo *TLI; + SetVector<BasicBlock *> DeadBlocks; ValueTable VN; @@ -692,9 +708,13 @@ namespace { void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); + BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); unsigned replaceAllDominatedUsesWith(Value *From, Value *To, const BasicBlockEdge &Root); bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); + bool processFoldableCondBr(BranchInst *BI); + void addDeadBlock(BasicBlock *BB); + void assignValNumForDeadCode(); }; char GVN::ID = 0; @@ -1068,14 +1088,15 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (Offset == -1) return Offset; + unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. Src = ConstantExpr::getBitCast(Src, - llvm::Type::getInt8PtrTy(Src->getContext())); + Type::getInt8PtrTy(Src->getContext(), AS)); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); - Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); if (ConstantFoldLoadFromConstPtr(Src, &TD)) return Offset; return -1; @@ -1152,7 +1173,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize*8); DestPTy = PointerType::get(DestPTy, - cast<PointerType>(PtrVal->getType())->getAddressSpace()); + PtrVal->getType()->getPointerAddressSpace()); Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); LoadInst *NewLoad = Builder.CreateLoad(PtrVal); @@ -1227,15 +1248,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); Constant *Src = cast<Constant>(MTI->getSource()); + unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. Src = ConstantExpr::getBitCast(Src, - llvm::Type::getInt8PtrTy(Src->getContext())); + Type::getInt8PtrTy(Src->getContext(), AS)); Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); - Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); return ConstantFoldLoadFromConstPtr(Src, &TD); } @@ -1250,8 +1272,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // just use the dominating value directly. if (ValuesPerBlock.size() == 1 && gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, - LI->getParent())) + LI->getParent())) { + assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block"); return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); + } // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; @@ -1321,7 +1345,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c << *getCoercedLoadValue() << '\n' << *Res << '\n' << "\n\n\n"); } - } else { + } else if (isMemIntrinValue()) { const DataLayout *TD = gvn.getDataLayout(); assert(TD && "Need target data to handle type mismatch case"); Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, @@ -1329,6 +1353,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); + } else { + assert(isUndefValue() && "Should be UndefVal"); + DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); + return UndefValue::get(LoadTy); } return Res; } @@ -1352,6 +1380,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); + if (DeadBlocks.count(DepBB)) { + // Dead dependent mem-op disguise as a load evaluating the same value + // as the load in question. + ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); + continue; + } + if (!DepInfo.isDef() && !DepInfo.isClobber()) { UnavailableBlocks.push_back(DepBB); continue; @@ -1513,7 +1548,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; - SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; + SmallVector<BasicBlock *, 4> CriticalEdgePred; for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { BasicBlock *Pred = *PI; @@ -1536,20 +1571,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; } - unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); - NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); + CriticalEdgePred.push_back(Pred); } } - if (!NeedToSplit.empty()) { - toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); - return false; - } - // Decide whether PRE is profitable for this load. unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && - "Fully available value should be eliminated above!"); + "Fully available value should already be eliminated!"); // If this load is unavailable in multiple predecessors, reject it. // FIXME: If we could restructure the CFG, we could make a common pred with @@ -1558,6 +1587,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (NumUnavailablePreds != 1) return false; + // Split critical edges, and update the unavailable predecessors accordingly. + for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(), + E = CriticalEdgePred.end(); I != E; I++) { + BasicBlock *OrigPred = *I; + BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); + PredLoads.erase(OrigPred); + PredLoads[NewPred] = 0; + DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" + << LoadBB->getName() << '\n'); + } + // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; SmallVector<Instruction*, 8> NewInsts; @@ -1594,7 +1634,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (MD) MD->removeInstruction(I); I->eraseFromParent(); } - return false; + // HINT:Don't revert the edge-splitting as following transformation may + // also need to split these critial edges. + return !CriticalEdgePred.empty(); } // Okay, we can eliminate this load by inserting a reload in the predecessor @@ -2181,11 +2223,13 @@ bool GVN::processInstruction(Instruction *I) { // For conditional branches, we can perform simple conditional propagation on // the condition value itself. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - if (!BI->isConditional() || isa<Constant>(BI->getCondition())) + if (!BI->isConditional()) return false; - Value *BranchCond = BI->getCondition(); + if (isa<Constant>(BI->getCondition())) + return processFoldableCondBr(BI); + Value *BranchCond = BI->getCondition(); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); // Avoid multiple edges early. @@ -2297,25 +2341,30 @@ bool GVN::runOnFunction(Function& F) { while (ShouldContinue) { DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); - if (splitCriticalEdges()) - ShouldContinue = true; Changed |= ShouldContinue; ++Iteration; } if (EnablePRE) { + // Fabricate val-num for dead-code in order to suppress assertion in + // performPRE(). + assignValNumForDeadCode(); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); Changed |= PREChanged; } } + // FIXME: Should perform GVN again after PRE does something. PRE can move // computations into blocks where they become fully redundant. Note that // we can't do this until PRE's critical edge splitting updates memdep. // Actually, when this happens, we should just fully integrate PRE into GVN. cleanupGlobalSets(); + // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each + // iteration. + DeadBlocks.clear(); return Changed; } @@ -2326,6 +2375,9 @@ bool GVN::processBlock(BasicBlock *BB) { // (and incrementing BI before processing an instruction). assert(InstrsToErase.empty() && "We expect InstrsToErase to be empty across iterations"); + if (DeadBlocks.count(BB)) + return false; + bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); @@ -2344,7 +2396,7 @@ bool GVN::processBlock(BasicBlock *BB) { if (!AtStart) --BI; - for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), + for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(), E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); @@ -2543,6 +2595,15 @@ bool GVN::performPRE(Function &F) { return Changed; } +/// Split the critical edge connecting the given two blocks, and return +/// the block inserted to the critical edge. +BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { + BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this); + if (MD) + MD->invalidateCachedPredecessors(); + return BB; +} + /// splitCriticalEdges - Split critical edges found during the previous /// iteration that may enable further optimization. bool GVN::splitCriticalEdges() { @@ -2569,9 +2630,18 @@ bool GVN::iterateOnFunction(Function &F) { RE = RPOT.end(); RI != RE; ++RI) Changed |= processBlock(*RI); #else + // Save the blocks this function have before transformation begins. GVN may + // split critical edge, and hence may invalidate the RPO/DT iterator. + // + std::vector<BasicBlock *> BBVect; + BBVect.reserve(256); for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) - Changed |= processBlock(DI->getBlock()); + BBVect.push_back(DI->getBlock()); + + for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); + I != E; I++) + Changed |= processBlock(*I); #endif return Changed; @@ -2601,3 +2671,133 @@ void GVN::verifyRemoved(const Instruction *Inst) const { } } } + +// BB is declared dead, which implied other blocks become dead as well. This +// function is to add all these blocks to "DeadBlocks". For the dead blocks' +// live successors, update their phi nodes by replacing the operands +// corresponding to dead blocks with UndefVal. +// +void GVN::addDeadBlock(BasicBlock *BB) { + SmallVector<BasicBlock *, 4> NewDead; + SmallSetVector<BasicBlock *, 4> DF; + + NewDead.push_back(BB); + while (!NewDead.empty()) { + BasicBlock *D = NewDead.pop_back_val(); + if (DeadBlocks.count(D)) + continue; + + // All blocks dominated by D are dead. + SmallVector<BasicBlock *, 8> Dom; + DT->getDescendants(D, Dom); + DeadBlocks.insert(Dom.begin(), Dom.end()); + + // Figure out the dominance-frontier(D). + for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(), + E = Dom.end(); I != E; I++) { + BasicBlock *B = *I; + for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) { + BasicBlock *S = *SI; + if (DeadBlocks.count(S)) + continue; + + bool AllPredDead = true; + for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++) + if (!DeadBlocks.count(*PI)) { + AllPredDead = false; + break; + } + + if (!AllPredDead) { + // S could be proved dead later on. That is why we don't update phi + // operands at this moment. + DF.insert(S); + } else { + // While S is not dominated by D, it is dead by now. This could take + // place if S already have a dead predecessor before D is declared + // dead. + NewDead.push_back(S); + } + } + } + } + + // For the dead blocks' live successors, update their phi nodes by replacing + // the operands corresponding to dead blocks with UndefVal. + for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end(); + I != E; I++) { + BasicBlock *B = *I; + if (DeadBlocks.count(B)) + continue; + + SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); + for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; PI++) { + BasicBlock *P = *PI; + + if (!DeadBlocks.count(P)) + continue; + + if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { + if (BasicBlock *S = splitCriticalEdges(P, B)) + DeadBlocks.insert(P = S); + } + + for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) { + PHINode &Phi = cast<PHINode>(*II); + Phi.setIncomingValue(Phi.getBasicBlockIndex(P), + UndefValue::get(Phi.getType())); + } + } + } +} + +// If the given branch is recognized as a foldable branch (i.e. conditional +// branch with constant condition), it will perform following analyses and +// transformation. +// 1) If the dead out-coming edge is a critical-edge, split it. Let +// R be the target of the dead out-coming edge. +// 1) Identify the set of dead blocks implied by the branch's dead outcoming +// edge. The result of this step will be {X| X is dominated by R} +// 2) Identify those blocks which haves at least one dead prodecessor. The +// result of this step will be dominance-frontier(R). +// 3) Update the PHIs in DF(R) by replacing the operands corresponding to +// dead blocks with "UndefVal" in an hope these PHIs will optimized away. +// +// Return true iff *NEW* dead code are found. +bool GVN::processFoldableCondBr(BranchInst *BI) { + if (!BI || BI->isUnconditional()) + return false; + + ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); + if (!Cond) + return false; + + BasicBlock *DeadRoot = Cond->getZExtValue() ? + BI->getSuccessor(1) : BI->getSuccessor(0); + if (DeadBlocks.count(DeadRoot)) + return false; + + if (!DeadRoot->getSinglePredecessor()) + DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); + + addDeadBlock(DeadRoot); + return true; +} + +// performPRE() will trigger assert if it come across an instruciton without +// associated val-num. As it normally has far more live instructions than dead +// instructions, it makes more sense just to "fabricate" a val-number for the +// dead code than checking if instruction involved is dead or not. +void GVN::assignValNumForDeadCode() { + for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(), + E = DeadBlocks.end(); I != E; I++) { + BasicBlock *BB = *I; + for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); + II != EE; II++) { + Instruction *Inst = &*II; + unsigned ValNum = VN.lookup_or_add(Inst); + addToLeaderTable(ValNum, Inst, BB); + } + } +} |