summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/GVN.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp252
1 files changed, 226 insertions, 26 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index f350b9b..6af269d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -21,8 +21,10 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -45,6 +47,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <vector>
using namespace llvm;
using namespace PatternMatch;
@@ -505,7 +508,9 @@ namespace {
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
+ MemIntrin, // A memory intrinsic which is loaded from.
+ UndefVal // A UndefValue representing a value from dead block (which
+ // is not yet physically removed from the CFG).
};
/// V - The value that is live out of the block.
@@ -543,10 +548,20 @@ namespace {
Res.Offset = Offset;
return Res;
}
-
+
+ static AvailableValueInBlock getUndef(BasicBlock *BB) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(0);
+ Res.Val.setInt(UndefVal);
+ Res.Offset = 0;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+ bool isUndefValue() const { return Val.getInt() == UndefVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
@@ -574,6 +589,7 @@ namespace {
DominatorTree *DT;
const DataLayout *TD;
const TargetLibraryInfo *TLI;
+ SetVector<BasicBlock *> DeadBlocks;
ValueTable VN;
@@ -692,9 +708,13 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool processFoldableCondBr(BranchInst *BI);
+ void addDeadBlock(BasicBlock *BB);
+ void assignValNumForDeadCode();
};
char GVN::ID = 0;
@@ -1068,14 +1088,15 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
@@ -1152,7 +1173,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
- cast<PointerType>(PtrVal->getType())->getAddressSpace());
+ PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
@@ -1227,15 +1248,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1250,8 +1272,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
- LI->getParent()))
+ LI->getParent())) {
+ assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+ }
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
@@ -1321,7 +1345,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
- } else {
+ } else if (isMemIntrinValue()) {
const DataLayout *TD = gvn.getDataLayout();
assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1329,6 +1353,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
+ } else {
+ assert(isUndefValue() && "Should be UndefVal");
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+ return UndefValue::get(LoadTy);
}
return Res;
}
@@ -1352,6 +1380,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
+ if (DeadBlocks.count(DepBB)) {
+ // Dead dependent mem-op disguise as a load evaluating the same value
+ // as the load in question.
+ ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+ continue;
+ }
+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
@@ -1513,7 +1548,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ SmallVector<BasicBlock *, 4> CriticalEdgePred;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
@@ -1536,20 +1571,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
- NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ CriticalEdgePred.push_back(Pred);
}
}
- if (!NeedToSplit.empty()) {
- toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
- return false;
- }
-
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
- "Fully available value should be eliminated above!");
+ "Fully available value should already be eliminated!");
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
@@ -1558,6 +1587,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (NumUnavailablePreds != 1)
return false;
+ // Split critical edges, and update the unavailable predecessors accordingly.
+ for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(),
+ E = CriticalEdgePred.end(); I != E; I++) {
+ BasicBlock *OrigPred = *I;
+ BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
+ PredLoads.erase(OrigPred);
+ PredLoads[NewPred] = 0;
+ DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
+ << LoadBB->getName() << '\n');
+ }
+
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
@@ -1594,7 +1634,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- return false;
+ // HINT:Don't revert the edge-splitting as following transformation may
+ // also need to split these critial edges.
+ return !CriticalEdgePred.empty();
}
// Okay, we can eliminate this load by inserting a reload in the predecessor
@@ -2181,11 +2223,13 @@ bool GVN::processInstruction(Instruction *I) {
// For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ if (!BI->isConditional())
return false;
- Value *BranchCond = BI->getCondition();
+ if (isa<Constant>(BI->getCondition()))
+ return processFoldableCondBr(BI);
+ Value *BranchCond = BI->getCondition();
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
// Avoid multiple edges early.
@@ -2297,25 +2341,30 @@ bool GVN::runOnFunction(Function& F) {
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
- if (splitCriticalEdges())
- ShouldContinue = true;
Changed |= ShouldContinue;
++Iteration;
}
if (EnablePRE) {
+ // Fabricate val-num for dead-code in order to suppress assertion in
+ // performPRE().
+ assignValNumForDeadCode();
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
Changed |= PREChanged;
}
}
+
// FIXME: Should perform GVN again after PRE does something. PRE can move
// computations into blocks where they become fully redundant. Note that
// we can't do this until PRE's critical edge splitting updates memdep.
// Actually, when this happens, we should just fully integrate PRE into GVN.
cleanupGlobalSets();
+ // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+ // iteration.
+ DeadBlocks.clear();
return Changed;
}
@@ -2326,6 +2375,9 @@ bool GVN::processBlock(BasicBlock *BB) {
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
"We expect InstrsToErase to be empty across iterations");
+ if (DeadBlocks.count(BB))
+ return false;
+
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2344,7 +2396,7 @@ bool GVN::processBlock(BasicBlock *BB) {
if (!AtStart)
--BI;
- for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+ for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(),
E = InstrsToErase.end(); I != E; ++I) {
DEBUG(dbgs() << "GVN removed: " << **I << '\n');
if (MD) MD->removeInstruction(*I);
@@ -2543,6 +2595,15 @@ bool GVN::performPRE(Function &F) {
return Changed;
}
+/// Split the critical edge connecting the given two blocks, and return
+/// the block inserted to the critical edge.
+BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+ BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ return BB;
+}
+
/// splitCriticalEdges - Split critical edges found during the previous
/// iteration that may enable further optimization.
bool GVN::splitCriticalEdges() {
@@ -2569,9 +2630,18 @@ bool GVN::iterateOnFunction(Function &F) {
RE = RPOT.end(); RI != RE; ++RI)
Changed |= processBlock(*RI);
#else
+ // Save the blocks this function have before transformation begins. GVN may
+ // split critical edge, and hence may invalidate the RPO/DT iterator.
+ //
+ std::vector<BasicBlock *> BBVect;
+ BBVect.reserve(256);
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- Changed |= processBlock(DI->getBlock());
+ BBVect.push_back(DI->getBlock());
+
+ for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
+ I != E; I++)
+ Changed |= processBlock(*I);
#endif
return Changed;
@@ -2601,3 +2671,133 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
}
}
}
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+ SmallVector<BasicBlock *, 4> NewDead;
+ SmallSetVector<BasicBlock *, 4> DF;
+
+ NewDead.push_back(BB);
+ while (!NewDead.empty()) {
+ BasicBlock *D = NewDead.pop_back_val();
+ if (DeadBlocks.count(D))
+ continue;
+
+ // All blocks dominated by D are dead.
+ SmallVector<BasicBlock *, 8> Dom;
+ DT->getDescendants(D, Dom);
+ DeadBlocks.insert(Dom.begin(), Dom.end());
+
+ // Figure out the dominance-frontier(D).
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+ E = Dom.end(); I != E; I++) {
+ BasicBlock *B = *I;
+ for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+ BasicBlock *S = *SI;
+ if (DeadBlocks.count(S))
+ continue;
+
+ bool AllPredDead = true;
+ for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+ if (!DeadBlocks.count(*PI)) {
+ AllPredDead = false;
+ break;
+ }
+
+ if (!AllPredDead) {
+ // S could be proved dead later on. That is why we don't update phi
+ // operands at this moment.
+ DF.insert(S);
+ } else {
+ // While S is not dominated by D, it is dead by now. This could take
+ // place if S already have a dead predecessor before D is declared
+ // dead.
+ NewDead.push_back(S);
+ }
+ }
+ }
+ }
+
+ // For the dead blocks' live successors, update their phi nodes by replacing
+ // the operands corresponding to dead blocks with UndefVal.
+ for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+ I != E; I++) {
+ BasicBlock *B = *I;
+ if (DeadBlocks.count(B))
+ continue;
+
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; PI++) {
+ BasicBlock *P = *PI;
+
+ if (!DeadBlocks.count(P))
+ continue;
+
+ if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
+ if (BasicBlock *S = splitCriticalEdges(P, B))
+ DeadBlocks.insert(P = S);
+ }
+
+ for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+ PHINode &Phi = cast<PHINode>(*II);
+ Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+ UndefValue::get(Phi.getType()));
+ }
+ }
+ }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+// 1) If the dead out-coming edge is a critical-edge, split it. Let
+// R be the target of the dead out-coming edge.
+// 1) Identify the set of dead blocks implied by the branch's dead outcoming
+// edge. The result of this step will be {X| X is dominated by R}
+// 2) Identify those blocks which haves at least one dead prodecessor. The
+// result of this step will be dominance-frontier(R).
+// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
+// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+ if (!BI || BI->isUnconditional())
+ return false;
+
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ if (!Cond)
+ return false;
+
+ BasicBlock *DeadRoot = Cond->getZExtValue() ?
+ BI->getSuccessor(1) : BI->getSuccessor(0);
+ if (DeadBlocks.count(DeadRoot))
+ return false;
+
+ if (!DeadRoot->getSinglePredecessor())
+ DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+ addDeadBlock(DeadRoot);
+ return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+ for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+ E = DeadBlocks.end(); I != E; I++) {
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
+ II != EE; II++) {
+ Instruction *Inst = &*II;
+ unsigned ValNum = VN.lookup_or_add(Inst);
+ addToLeaderTable(ValNum, Inst, BB);
+ }
+ }
+}
OpenPOWER on IntegriCloud