summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp1507
1 files changed, 1177 insertions, 330 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 518df7c..c767da6 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
@@ -22,6 +23,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@@ -38,7 +40,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/NoFolder.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <set>
@@ -53,6 +55,13 @@ static cl::opt<bool>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
@@ -68,10 +77,13 @@ namespace {
// Comparing pointers is ok as we only rely on the order for uniquing.
return Value < RHS.Value;
}
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
};
class SimplifyCFGOpt {
- const TargetData *const TD;
+ const DataLayout *const TD;
+ const TargetTransformInfo *const TTI;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -91,7 +103,8 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+ SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
+ : TD(td), TTI(tti) {}
bool run(BasicBlock *BB);
};
}
@@ -101,14 +114,14 @@ public:
///
static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
if (SI1 == SI2) return false; // Can't merge with self!
-
+
// It is not safe to merge these two switch instructions if they have a common
// successor, and if that successor has a PHI node, and if *that* PHI node has
// conflicting incoming values from the two switch blocks.
BasicBlock *SI1BB = SI1->getParent();
BasicBlock *SI2BB = SI2->getParent();
SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
-
+
for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
if (SI1Succs.count(*I))
for (BasicBlock::iterator BBI = (*I)->begin();
@@ -118,7 +131,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
PN->getIncomingValueForBlock(SI2BB))
return false;
}
-
+
return true;
}
@@ -135,7 +148,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
assert(SI1->isUnconditional() && SI2->isConditional());
// We fold the unconditional branch if we can easily update all PHI nodes in
- // common successors:
+ // common successors:
// 1> We have a constant incoming value for the conditional branch;
// 2> We have "Cond" as the incoming value for the unconditional branch;
// 3> SI2->getCondition() and Cond have same operands.
@@ -170,7 +183,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
-
+
PHINode *PN;
for (BasicBlock::iterator I = Succ->begin();
(PN = dyn_cast<PHINode>(I)); ++I)
@@ -222,7 +235,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// doesn't dominate BB.
if (Pred2->getSinglePredecessor() == 0)
return 0;
-
+
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
if (Pred1Br->getSuccessor(0) == BB &&
@@ -252,7 +265,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// Otherwise, if this is a conditional branch, then we can use it!
BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
if (BI == 0) return 0;
-
+
assert(BI->isConditional() && "Two successors but not conditional?");
if (BI->getSuccessor(0) == Pred1) {
IfTrue = Pred1;
@@ -345,7 +358,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// If we aren't allowing aggressive promotion anymore, then don't consider
// instructions in the 'if region'.
if (AggressiveInsts == 0) return false;
-
+
// If we have seen this instruction before, don't count it again.
if (AggressiveInsts->count(I)) return true;
@@ -374,7 +387,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
@@ -382,7 +395,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+ IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -408,10 +421,10 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
/// Values vector.
static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
- const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+ const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
-
+
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
@@ -420,21 +433,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Vals.push_back(C);
return I->getOperand(0);
}
-
+
// If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
// the set.
ConstantRange Span =
ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
-
+
// If this is an and/!= check then we want to optimize "x ugt 2" into
// x != 0 && x != 1.
if (!isEQ)
Span = Span.inverse();
-
+
// If there are a ton of values, we don't want to make a ginormous switch.
if (Span.getSetSize().ugt(8) || Span.isEmptySet())
return 0;
-
+
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
@@ -442,11 +455,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
}
return 0;
}
-
+
// Otherwise, we can only handle an | or &, depending on isEQ.
if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
return 0;
-
+
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
@@ -467,12 +480,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Extra = I->getOperand(1);
return LHS;
}
-
+
Vals.resize(NumValsBeforeLHS);
UsedICmps = UsedICmpsBeforeLHS;
return 0;
}
-
+
// If the LHS can't be folded in, but Extra is available and RHS can, try to
// use LHS as Extra.
if (Extra == 0 || Extra == I->getOperand(0)) {
@@ -484,7 +497,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
assert(Vals.size() == NumValsBeforeLHS);
Extra = OldExtra;
}
-
+
return 0;
}
@@ -556,11 +569,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
/// in the list that match the specified block.
static void EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- for (unsigned i = 0, e = Cases.size(); i != e; ++i)
- if (Cases[i].Dest == BB) {
- Cases.erase(Cases.begin()+i);
- --i; --e;
- }
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
}
/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
@@ -615,6 +624,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
assert(ThisVal && "This isn't a value comparison!!");
if (ThisVal != PredVal) return false; // Different predicates.
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
// Find out information about when control will move from Pred to TI's block.
std::vector<ValueEqualityComparisonCase> PredCases;
BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
@@ -634,7 +646,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// can simplify TI.
if (!ValuesOverlap(PredCases, ThisCases))
return false;
-
+
if (isa<BranchInst>(TI)) {
// Okay, one of the successors of this condbr is dead. Convert it to a
// uncond br.
@@ -652,7 +664,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
EraseTerminatorInstAndDCECond(TI);
return true;
}
-
+
SwitchInst *SI = cast<SwitchInst>(TI);
// Okay, TI has cases that are statically dead, prune them away.
SmallPtrSet<Constant*, 16> DeadCases;
@@ -662,18 +674,37 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
if (DeadCases.count(i.getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
i.getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
+ if (HasWeight && Weights.size() >= 2)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(Weights));
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
-
+
// Otherwise, TI's block must correspond to some matched value. Find out
// which value (or set of values) this is.
ConstantInt *TIV = 0;
@@ -729,8 +760,8 @@ namespace {
}
static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt**)P1;
- const ConstantInt *RHS = *(const ConstantInt**)P2;
+ const ConstantInt *LHS = *(const ConstantInt*const*)P1;
+ const ConstantInt *RHS = *(const ConstantInt*const*)P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
@@ -738,6 +769,56 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) {
return -1;
}
+static inline bool HasBranchWeights(const Instruction* I) {
+ MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0))
+ if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ return MDS->getString().equals("branch_weights");
+
+ return false;
+}
+
+/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(TerminatorInst *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
+ }
+}
+
+/// Sees if any of the weights are too big for a uint32_t, and halves all the
+/// weights if any are.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ bool Halve = false;
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ if (Weights[i] > UINT_MAX) {
+ Halve = true;
+ break;
+ }
+
+ if (! Halve)
+ return;
+
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ Weights[i] /= 2;
+}
+
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
/// equality comparison instruction (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
@@ -770,6 +851,31 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// build.
SmallVector<BasicBlock*, 8> NewSuccessors;
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistant here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistant here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
if (PredDefault == BB) {
// If this is the default destination from PTI, only the edges in TI
// that don't occur in PTI, or that branch to BB will be activated.
@@ -780,6 +886,14 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
else {
// The default destination is BB, we don't need explicit targets.
std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
PredCases.pop_back();
--i; --e;
}
@@ -790,21 +904,47 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
PredDefault = BBDefault;
NewSuccessors.push_back(BBDefault);
}
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (!PTIHandled.count(BBCases[i].Value) &&
BBCases[i].Dest != BBDefault) {
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i+1]);
+ ValidTotalSuccWeight += SuccWeights[i+1];
+ }
}
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
} else {
// If this is not the default destination from PSI, only the edges
// in SI that occur in PSI with a destination of BB will be
// activated.
std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt*, uint64_t> WeightsForHandled;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == BB) {
PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
std::swap(PredCases[i], PredCases.back());
PredCases.pop_back();
--i; --e;
@@ -815,6 +955,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (PTIHandled.count(BBCases[i].Value)) {
// If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
@@ -822,9 +964,11 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// If there are any constants vectored to BB that TI doesn't handle,
// they must go to the default destination of TI.
- for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
PTIHandled.begin(),
E = PTIHandled.end(); I != E; ++I) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
NewSuccessors.push_back(BBDefault);
}
@@ -839,7 +983,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
+ assert(TD && "Cannot switch on pointer without DataLayout");
CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
"magicptr");
}
@@ -851,6 +995,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ NewSI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
EraseTerminatorInstAndDCECond(PTI);
// Okay, last check. If BB is still a successor of PSI, then we must
@@ -984,11 +1139,11 @@ HoistTerminator:
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
if (BB1V == BB2V) continue;
-
+
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
+ if (SI == 0)
SI = cast<SelectInst>
(Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
BB1V->getName()+"."+BB2V->getName()));
@@ -1008,6 +1163,175 @@ HoistTerminator:
return true;
}
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BB1 = BI1->getParent();
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // Check that BBEnd has two predecessors and the other predecessor ends with
+ // an unconditional branch.
+ pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
+ BasicBlock *Pred0 = *PI++;
+ if (PI == PE) // Only one predecessor.
+ return false;
+ BasicBlock *Pred1 = *PI++;
+ if (PI != PE) // More than two predecessors.
+ return false;
+ BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
+ BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+ if (!BI2 || !BI2->isUnconditional())
+ return false;
+
+ // Gather the PHI nodes in BBEnd.
+ std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ Instruction *FirstNonPhiInBBEnd = 0;
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ } else {
+ FirstNonPhiInBBEnd = &*I;
+ break;
+ }
+ }
+ if (!FirstNonPhiInBBEnd)
+ return false;
+
+
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. We scan backward for obviously identical
+ // instructions in an identical order.
+ BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+ RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return false;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return false;
+ // Skip the unconditional branches.
+ ++RI1;
+ ++RI2;
+
+ bool Changed = false;
+ while (RI1 != RE1 && RI2 != RE2) {
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return Changed;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return Changed;
+
+ Instruction *I1 = &*RI1, *I2 = &*RI2;
+ // I1 and I2 should have a single use in the same PHI node, and they
+ // perform the same operation.
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+ isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+ isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+ I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+ I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+ !I1->hasOneUse() || !I2->hasOneUse() ||
+ MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+ MapValueFromBB1ToBB2[I1].first != I2)
+ return Changed;
+
+ // Check whether we should swap the operands of ICmpInst.
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+ bool SwapOpnds = false;
+ if (ICmp1 && ICmp2 &&
+ ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+ (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+ ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+ ICmp2->swapOperands();
+ SwapOpnds = true;
+ }
+ if (!I1->isSameOperationAs(I2)) {
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+
+ // The operands should be either the same or they need to be generated
+ // with a PHI node after sinking. We only handle the case where there is
+ // a single pair of different operands.
+ Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ unsigned Op1Idx = 0;
+ for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+ if (I1->getOperand(I) == I2->getOperand(I))
+ continue;
+ // Early exit if we have more-than one pair of different operands or
+ // the different operand is already in MapValueFromBB1ToBB2.
+ // Early exit if we need a PHI node to replace a constant.
+ if (DifferentOp1 ||
+ MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+ MapValueFromBB1ToBB2.end() ||
+ isa<Constant>(I1->getOperand(I)) ||
+ isa<Constant>(I2->getOperand(I))) {
+ // If we can't sink the instructions, undo the swapping.
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+ DifferentOp1 = I1->getOperand(I);
+ Op1Idx = I;
+ DifferentOp2 = I2->getOperand(I);
+ }
+
+ // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+ // remove (I1, I2) from MapValueFromBB1ToBB2.
+ if (DifferentOp1) {
+ PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink",
+ BBEnd->begin());
+ MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ // I1 should use NewPN instead of DifferentOp1.
+ I1->setOperand(Op1Idx, NewPN);
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
+ PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+ MapValueFromBB1ToBB2.erase(I1);
+
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+ DEBUG(dbgs() << " " << *I2 << "\n";);
+ // We need to update RE1 and RE2 if we are going to sink the first
+ // instruction in the basic block down.
+ bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ // Sink the instruction.
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ if (!OldPN->use_empty())
+ OldPN->replaceAllUsesWith(I1);
+ OldPN->eraseFromParent();
+
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ if (UpdateRE1)
+ RE1 = BB1->getInstList().rend();
+ if (UpdateRE2)
+ RE2 = BB2->getInstList().rend();
+ FirstNonPhiInBBEnd = I1;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
/// (for now, restricted to a single instruction that's side effect free) from
@@ -1056,7 +1380,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// Do not hoist the instruction if any of its operands are defined but not
// used in this BB. The transformation will prevent the operand from
// being sunk into the use block.
- for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
i != e; ++i) {
Instruction *OpI = dyn_cast<Instruction>(*i);
if (OpI && OpI->getParent() == BIParent &&
@@ -1112,7 +1436,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// as well.
if (PHIs.empty())
return false;
-
+
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
@@ -1162,13 +1486,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
unsigned Size = 0;
-
+
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (Size > 10) return false; // Don't clone large BB's.
++Size;
-
+
// We can only support instructions that do not define values that are
// live outside of the current basic block.
for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
@@ -1176,7 +1500,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
Instruction *U = cast<Instruction>(*UI);
if (U->getParent() != BB || isa<PHINode>(U)) return false;
}
-
+
// Looks ok, continue checking.
}
@@ -1187,38 +1511,38 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
// outside of the block.
if (!PN || PN->getParent() != BB || !PN->hasOneUse())
return false;
-
+
// Degenerate case of a single entry PHI.
if (PN->getNumIncomingValues() == 1) {
FoldSingleEntryPHINodes(PN->getParent());
- return true;
+ return true;
}
// Now we know that this block has multiple preds and two succs.
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
-
+
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
-
+
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
BasicBlock *PredBB = PN->getIncomingBlock(i);
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
-
+
if (RealDest == BB) continue; // Skip self loops.
// Skip if the predecessor's terminator is an indirect branch.
if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
-
+
// The dest block might have PHI nodes, other predecessors and other
// difficult cases. Instead of being smart about this, just insert a new
// block that jumps to the destination block, effectively splitting
@@ -1227,7 +1551,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
RealDest->getName()+".critedge",
RealDest->getParent(), RealDest);
BranchInst::Create(RealDest, EdgeBB);
-
+
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -1244,7 +1568,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
// Clone the instruction.
Instruction *N = BBI->clone();
if (BBI->hasName()) N->setName(BBI->getName()+".c");
-
+
// Update operands due to translation.
for (User::op_iterator i = N->op_begin(), e = N->op_end();
i != e; ++i) {
@@ -1252,7 +1576,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
if (PI != TranslateMap.end())
*i = PI->second;
}
-
+
// Check for trivial simplification.
if (Value *V = SimplifyInstruction(N, TD)) {
TranslateMap[BBI] = V;
@@ -1283,7 +1607,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1297,7 +1621,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
// Don't bother if the branch will be constant folded trivially.
isa<ConstantInt>(IfCond))
return false;
-
+
// Okay, we found that we can merge this two-entry phi node into a select.
// Doing so would require us to fold *all* two entry phi nodes in this block.
// At some point this becomes non-profitable (particularly if the target
@@ -1307,14 +1631,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
if (NumPhis > 2)
return false;
-
+
// Loop over the PHI's seeing if we can promote them all to select
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction*, 4> AggressiveInsts;
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
MaxCostVal1 = PHINodeFoldingThreshold;
-
+
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
if (Value *V = SimplifyInstruction(PN, TD)) {
@@ -1322,19 +1646,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
PN->eraseFromParent();
continue;
}
-
+
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
MaxCostVal0) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
MaxCostVal1))
return false;
}
-
+
// If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
if (PN == 0) return true;
-
+
// Don't fold i1 branches on PHIs which contain binary operators. These can
// often be turned into switches and other things.
if (PN->getType()->isIntegerTy(1) &&
@@ -1342,7 +1666,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
isa<BinaryOperator>(PN->getIncomingValue(1)) ||
isa<BinaryOperator>(IfCond)))
return false;
-
+
// If we all PHI nodes are promotable, check to make sure that all
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
@@ -1362,7 +1686,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
}
-
+
if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
IfBlock2 = 0;
} else {
@@ -1375,15 +1699,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
}
-
+
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
-
+
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
Instruction *InsertPt = DomBlock->getTerminator();
IRBuilder<true, NoFolder> Builder(InsertPt);
-
+
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
if (IfBlock1)
@@ -1394,19 +1718,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
DomBlock->getInstList().splice(InsertPt,
IfBlock2->getInstList(), IfBlock2->begin(),
IfBlock2->getTerminator());
-
+
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
-
- SelectInst *NV =
+
+ SelectInst *NV =
cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
PN->replaceAllUsesWith(NV);
NV->takeName(PN);
PN->eraseFromParent();
}
-
+
// At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
// has been flattened. Change DomBlock to jump directly to our new block to
// avoid other simplifycfg's kicking in on the diamond.
@@ -1420,14 +1744,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
/// to two returning blocks, try to merge them together into one return,
/// introducing a select if the return values disagree.
-static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
IRBuilder<> &Builder) {
assert(BI->isConditional() && "Must be a conditional branch");
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
-
+
// Check to ensure both blocks are empty (just a return) or optionally empty
// with PHI nodes. If there are other instructions, merging would cause extra
// computation on one path or the other.
@@ -1447,12 +1771,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
EraseTerminatorInstAndDCECond(BI);
return true;
}
-
+
// Otherwise, figure out what the true and false return values are
// so we can insert a new select instruction.
Value *TrueValue = TrueRet->getReturnValue();
Value *FalseValue = FalseRet->getReturnValue();
-
+
// Unwrap any PHI nodes in the return blocks.
if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
if (TVPN->getParent() == TrueSucc)
@@ -1460,7 +1784,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
if (FVPN->getParent() == FalseSucc)
FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
-
+
// In order for this transformation to be safe, we must be able to
// unconditionally execute both operands to the return. This is
// normally the case, but we could have a potentially-trapping
@@ -1472,12 +1796,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
if (FCV->canTrap())
return false;
-
+
// Okay, we collected all the mapped values and checked them for sanity, and
// defined to really do this transformation. First, update the CFG.
TrueSucc->removePredecessor(BI->getParent());
FalseSucc->removePredecessor(BI->getParent());
-
+
// Insert select instructions where needed.
Value *BrCond = BI->getCondition();
if (TrueValue) {
@@ -1491,15 +1815,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
}
}
- Value *RI = !TrueValue ?
+ Value *RI = !TrueValue ?
Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
(void) RI;
-
+
DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
<< "\n " << *BI << "NewRet = " << *RI
<< "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
-
+
EraseTerminatorInstAndDCECond(BI);
return true;
@@ -1510,7 +1834,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
/// parameters and return true, or returns false if no or invalid metadata was
/// found.
static bool ExtractBranchMetadata(BranchInst *BI,
- APInt &ProbTrue, APInt &ProbFalse) {
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
assert(BI->isConditional() &&
"Looking for probabilities on unconditional branch?");
MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
@@ -1518,35 +1842,11 @@ static bool ExtractBranchMetadata(BranchInst *BI,
ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
if (!CITrue || !CIFalse) return false;
- ProbTrue = CITrue->getValue();
- ProbFalse = CIFalse->getValue();
- assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 &&
- "Branch probability metadata must be 32-bit integers");
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
return true;
}
-/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In
-/// the event of overflow, logically-shifts all four inputs right until the
-/// multiply fits.
-static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
- unsigned &BitsLost) {
- BitsLost = 0;
- bool Overflow = false;
- APInt Result = A.umul_ov(B, Overflow);
- if (Overflow) {
- APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A);
- do {
- B = B.lshr(1);
- ++BitsLost;
- } while (B.ugt(MaxB));
- A = A.lshr(BitsLost);
- C = C.lshr(BitsLost);
- D = D.lshr(BitsLost);
- Result = A * B;
- }
- return Result;
-}
-
/// checkCSEInPredecessor - Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
///
@@ -1600,7 +1900,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (Cond == 0)
return false;
}
-
+
if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
@@ -1623,7 +1923,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
-
+
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
}
@@ -1631,13 +1931,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Only a single bonus inst is allowed.
if (&*FrontIt != Cond)
return false;
-
+
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
// Ingore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
-
+
if (&*CondIt != BI)
return false;
@@ -1649,7 +1949,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
if (CE->canTrap())
return false;
-
+
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
@@ -1659,22 +1959,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *PredBlock = *PI;
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
-
+
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
SmallVector<PHINode*, 4> PHIs;
if (PBI == 0 || PBI->isUnconditional() ||
- (BI->isConditional() &&
+ (BI->isConditional() &&
!SafeToMergeTerminators(BI, PBI)) ||
(!BI->isConditional() &&
!isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
continue;
-
+
// Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc;
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
bool InvertPredCond = false;
-
+
if (BI->isConditional()) {
if (PBI->getSuccessor(0) == TrueDest)
Opc = Instruction::Or;
@@ -1693,7 +1993,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
- // must already have been resolved, so we won't be inhibiting the
+ // must already have been resolved, so we won't be inhibiting the
// out-of-order core by speculating them earlier.
if (BonusInst) {
// Collect the values used by the bonus inst
@@ -1707,47 +2007,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
-
+
// Walk up to four levels back up the use-def chain of the predecessor's
// terminator to see if all those values were used. The choice of four
// levels is arbitrary, to provide a compile-time-cost bound.
while (!Worklist.empty()) {
std::pair<Value*, unsigned> Pair = Worklist.back();
Worklist.pop_back();
-
+
if (Pair.second >= 4) continue;
UsedValues.erase(Pair.first);
if (UsedValues.empty()) break;
-
+
if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
- }
+ }
}
-
+
if (!UsedValues.empty()) return false;
}
DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
- IRBuilder<> Builder(PBI);
+ IRBuilder<> Builder(PBI);
// If we need to invert the condition in the pred block to match, do so now.
if (InvertPredCond) {
Value *NewCond = PBI->getCondition();
-
+
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
CmpInst *CI = cast<CmpInst>(NewCond);
CI->setPredicate(CI->getInversePredicate());
} else {
- NewCond = Builder.CreateNot(NewCond,
+ NewCond = Builder.CreateNot(NewCond,
PBI->getCondition()->getName()+".not");
}
-
+
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
-
+
// If we have a bonus inst, clone it into the predecessor block.
Instruction *NewBonus = 0;
if (BonusInst) {
@@ -1756,7 +2056,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
NewBonus->takeName(BonusInst);
BonusInst->setName(BonusInst->getName()+".old");
}
-
+
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
Instruction *New = Cond->clone();
@@ -1764,21 +2064,60 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
PredBlock->getInstList().insert(PBI, New);
New->takeName(Cond);
Cond->setName(New->getName()+".old");
-
+
if (BI->isConditional()) {
- Instruction *NewCond =
+ Instruction *NewCond =
cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
New, "or.cond"));
PBI->setCondition(NewCond);
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
if (PBI->getSuccessor(0) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(TrueDest, PredBlock, BB);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(FalseDest, PredBlock, BB);
PBI->setSuccessor(1, FalseDest);
}
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
@@ -1806,7 +2145,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
// is false: PBI_Cond and BI_Value
- MergedCond =
+ MergedCond =
cast<Instruction>(Builder.CreateBinOp(Instruction::And,
PBI->getCondition(), New,
"and.cond"));
@@ -1814,7 +2153,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *NotCond =
cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
"not.cond"));
- MergedCond =
+ MergedCond =
cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
NotCond, MergedCond,
"or.cond"));
@@ -1833,95 +2172,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// TODO: If BB is reachable from all paths through PredBlock, then we
// could replace PBI's branch probabilities with BI's.
- // Merge probability data into PredBlock's branch.
- APInt A, B, C, D;
- if (PBI->isConditional() && BI->isConditional() &&
- ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) {
- // Given IR which does:
- // bbA:
- // br i1 %x, label %bbB, label %bbC
- // bbB:
- // br i1 %y, label %bbD, label %bbC
- // Let's call the probability that we take the edge from %bbA to %bbB
- // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to
- // %bbC probability 'd'.
- //
- // We transform the IR into:
- // bbA:
- // br i1 %z, label %bbD, label %bbC
- // where the probability of going to %bbD is (a*c) and going to bbC is
- // (b+a*d).
- //
- // Probabilities aren't stored as ratios directly. Using branch weights,
- // we get:
- // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D.
-
- // In the event of overflow, we want to drop the LSB of the input
- // probabilities.
- unsigned BitsLost;
-
- // Ignore overflow result on ProbTrue.
- APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost);
-
- APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- }
-
- APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- Tmp1 = Tmp1.lshr(BitsLost*2);
- }
-
- APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- Tmp1 = Tmp1.lshr(BitsLost*2);
- Tmp2 = Tmp2.lshr(BitsLost*2);
- }
-
- bool Overflow1 = false, Overflow2 = false;
- APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1);
- APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2);
-
- if (Overflow1 || Overflow2) {
- ProbTrue = ProbTrue.lshr(1);
- Tmp1 = Tmp1.lshr(1);
- Tmp2 = Tmp2.lshr(1);
- Tmp3 = Tmp3.lshr(1);
- Tmp4 = Tmp2 + Tmp3;
- ProbFalse = Tmp4 + Tmp1;
- }
-
- // The sum of branch weights must fit in 32-bits.
- if (ProbTrue.isNegative() && ProbFalse.isNegative()) {
- ProbTrue = ProbTrue.lshr(1);
- ProbFalse = ProbFalse.lshr(1);
- }
-
- if (ProbTrue != ProbFalse) {
- // Normalize the result.
- APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse);
- ProbTrue = ProbTrue.udiv(GCD);
- ProbFalse = ProbFalse.udiv(GCD);
-
- MDBuilder MDB(BI->getContext());
- MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(),
- ProbFalse.getZExtValue());
- PBI->setMetadata(LLVMContext::MD_prof, N);
- } else {
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
- }
- } else {
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
- }
-
// Copy any debug value intrinsics into the end of PredBlock.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (isa<DbgInfoIntrinsic>(*I))
I->clone()->insertBefore(PBI);
-
+
return true;
}
return false;
@@ -1936,7 +2191,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
BasicBlock *BB = BI->getParent();
// If this block ends with a branch instruction, and if there is a
- // predecessor that ends on a branch of the same condition, make
+ // predecessor that ends on a branch of the same condition, make
// this conditional branch redundant.
if (PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
@@ -1945,11 +2200,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (BB->getSinglePredecessor()) {
// Turn this into a branch on constant.
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
CondIsTrue));
return true; // Nuke the branch on constant.
}
-
+
// Otherwise, if there are multiple predecessors, insert a PHI that merges
// in the constant and simplify the block result. Subsequent passes of
// simplifycfg will thread the block.
@@ -1969,18 +2224,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
CondIsTrue), P);
} else {
NewPN->addIncoming(BI->getCondition(), P);
}
}
-
+
BI->setCondition(NewPN);
return true;
}
}
-
+
// If this is a conditional branch in an empty block, and if any
// predecessors is a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
@@ -1991,11 +2246,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (&*BBI != BI)
return false;
-
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
if (CE->canTrap())
return false;
-
+
int PBIOp, BIOp;
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
PBIOp = BIOp = 0;
@@ -2007,31 +2262,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PBIOp = BIOp = 1;
else
return false;
-
+
// Check to make sure that the other destination of this branch
// isn't BB itself. If so, this is an infinite loop that will
// keep getting unwound.
if (PBI->getSuccessor(PBIOp) == BB)
return false;
-
- // Do not perform this transformation if it would require
+
+ // Do not perform this transformation if it would require
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
-
+
unsigned NumPhis = 0;
for (BasicBlock::iterator II = CommonDest->begin();
isa<PHINode>(II); ++II, ++NumPhis)
if (NumPhis > 2) // Disable this xform.
return false;
-
+
// Finally, if everything is ok, fold the branches to logical ops.
BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
-
+
DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
-
-
+
+
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
// exits. We don't *know* that the program avoids the infinite loop
@@ -2046,13 +2301,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
"infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
OtherDest = InfLoopBlock;
- }
-
+ }
+
DEBUG(dbgs() << *PBI->getParent()->getParent());
// BI may have other predecessors. Because of this, we leave
// it alone, but modify PBI.
-
+
// Make sure we get to CommonDest on True&True directions.
Value *PBICond = PBI->getCondition();
IRBuilder<true, NoFolder> Builder(PBI);
@@ -2065,16 +2320,43 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Merge the conditions.
Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
-
+
// Modify PBI to branch on the new condition to the new dests.
PBI->setCondition(Cond);
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
-
+
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ if (PredHasWeights && SuccHasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ SmallVector<uint64_t, 2> NewWeights;
+ NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon);
+ NewWeights.push_back(PredOther * SuccOther);
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
// OtherDest may have phi nodes. If so, add an entry from PBI's
// block that are identical to the entries for BI's block.
AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
-
+
// We know that the CommonDest already had an edge from PBI to
// it. If it has PHIs though, the PHIs may have different
// entries for BB and PBI's BB. If so, insert a select to make
@@ -2092,10 +2374,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PN->setIncomingValue(PBBIdx, NV);
}
}
-
+
DEBUG(dbgs() << "INTO: " << *PBI->getParent());
DEBUG(dbgs() << *PBI->getParent()->getParent());
-
+
// This basic block is probably dead. We know it has at least
// one fewer predecessor.
return true;
@@ -2107,7 +2389,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Also makes sure not to introduce new successors by assuming that edges to
// non-successor TrueBBs and FalseBBs aren't reachable.
static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
- BasicBlock *TrueBB, BasicBlock *FalseBB){
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight){
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
@@ -2136,10 +2420,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
Builder.CreateBr(TrueBB);
- else
+ else {
// We found both of the successors we were looking for.
// Create a conditional branch sharing the condition of the select.
- Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(OldTerm->getContext()).
+ createBranchWeights(TrueWeight, FalseWeight));
+ }
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
// Neither of the selected blocks were successors, so this
// terminator must be unreachable.
@@ -2176,8 +2465,23 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
+ getSuccessorIndex()];
+ FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
+ getSuccessorIndex()];
+ }
+ }
+
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
+ TrueWeight, FalseWeight);
}
// SimplifyIndirectBrOnSelect - Replaces
@@ -2197,7 +2501,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
BasicBlock *FalseBB = FBA->getBasicBlock();
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
+ 0, 0);
}
/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
@@ -2214,11 +2519,11 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// br label %end
/// end:
/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
-///
+///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
- const TargetData *TD,
+ const DataLayout *TD,
IRBuilder<> &Builder) {
BasicBlock *BB = ICI->getParent();
@@ -2228,17 +2533,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
Value *V = ICI->getOperand(0);
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
-
+
// The pattern we're looking for is where our only predecessor is a switch on
// 'V' and this block is the default case for the switch. In this case we can
// fold the compared value into the switch to simplify things.
BasicBlock *Pred = BB->getSinglePredecessor();
if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
-
+
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
if (SI->getCondition() != V)
return false;
-
+
// If BB is reachable on a non-default case, then we simply know the value of
// V in this block. Substitute it and constant fold the icmp instruction
// away.
@@ -2246,7 +2551,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
ConstantInt *VVal = SI->findCaseDest(BB);
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
-
+
if (Value *V = SimplifyInstruction(ICI, TD)) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
@@ -2254,7 +2559,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// BB is now empty, so it is likely to simplify away.
return SimplifyCFG(BB) | true;
}
-
+
// Ok, the block is reachable from the default dest. If the constant we're
// comparing exists in one of the other edges, then we can constant fold ICI
// and zap it.
@@ -2264,13 +2569,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
V = ConstantInt::getFalse(BB->getContext());
else
V = ConstantInt::getTrue(BB->getContext());
-
+
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
return SimplifyCFG(BB) | true;
}
-
+
// The use of the icmp has to be in the 'end' block, by the only PHI node in
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
@@ -2296,8 +2601,23 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// the switch to the merge point on the compared value.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
BB->getParent(), BB);
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Split weight for default case to case for "Cst".
+ Weights[0] = (Weights[0]+1) >> 1;
+ Weights.push_back(Weights[0]);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+ }
SI->addCase(Cst, NewBB);
-
+
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
@@ -2309,12 +2629,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
-
-
+
+
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
// 'setne's and'ed together, collect them.
@@ -2323,7 +2643,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
bool TrueWhenEqual = true;
Value *ExtraCase = 0;
unsigned UsedICmps = 0;
-
+
if (Cond->getOpcode() == Instruction::Or) {
CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
UsedICmps);
@@ -2332,7 +2652,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
UsedICmps);
TrueWhenEqual = false;
}
-
+
// If we didn't have a multiply compared value, fail.
if (CompVal == 0) return false;
@@ -2344,21 +2664,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
// instruction can't handle, remove them now.
array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
+
// If Extra was used, we require at least two switch values to do the
// transformation. A switch with one value is just an cond branch.
if (ExtraCase && Values.size() < 2) return false;
-
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
BasicBlock *EdgeBB = BI->getSuccessor(0);
if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-
+
BasicBlock *BB = BI->getParent();
-
+
DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
<< " cases into SWITCH. BB is:\n" << *BB);
-
+
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
@@ -2372,13 +2695,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
else
Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
-
+
OldTI->eraseFromParent();
-
+
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
// for the edge we just added.
AddPredecessorToBlock(EdgeBB, BB, NewBB);
-
+
DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
<< "\nEXTRABB = " << *BB);
BB = NewBB;
@@ -2387,19 +2710,19 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
+ assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
TD->getIntPtrType(CompVal->getContext()),
"magicptr");
}
-
+
// Create the new switch instruction now.
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
// Add all of the 'cases' to the switch instruction.
for (unsigned i = 0, e = Values.size(); i != e; ++i)
New->addCase(Values[i], EdgeBB);
-
+
// We added edges from PI to the EdgeBB. As such, if there were any
// PHI nodes in EdgeBB, they need entries to be added corresponding to
// the number of edges added.
@@ -2410,10 +2733,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
PN->addIncoming(InVal, BB);
}
-
+
// Erase the old branch instruction.
EraseTerminatorInstAndDCECond(BI);
-
+
DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
}
@@ -2467,7 +2790,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
-
+
// Find predecessors that end with branches.
SmallVector<BasicBlock*, 8> UncondBranchPreds;
SmallVector<BranchInst*, 8> CondBranchPreds;
@@ -2481,7 +2804,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
CondBranchPreds.push_back(BI);
}
}
-
+
// If we found some, do the transformation!
if (!UncondBranchPreds.empty() && DupRet) {
while (!UncondBranchPreds.empty()) {
@@ -2490,21 +2813,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
<< "INTO UNCOND BRANCH PRED: " << *Pred);
(void)FoldReturnIntoUncondBranch(RI, BB, Pred);
}
-
+
// If we eliminated all predecessors of the block, delete the block now.
if (pred_begin(BB) == pred_end(BB))
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
-
+
return true;
}
-
+
// Check out all of the conditional branches going to this return
// instruction. If any of them just select between returns, change the
// branch itself into a select/return pair.
while (!CondBranchPreds.empty()) {
BranchInst *BI = CondBranchPreds.pop_back_val();
-
+
// Check to see if the non-BB successor is also a return block.
if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
@@ -2516,9 +2839,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *BB = UI->getParent();
-
+
bool Changed = false;
-
+
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
while (UI != BB->begin()) {
@@ -2558,11 +2881,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BBI->eraseFromParent();
Changed = true;
}
-
+
// If the unreachable instruction is the first in the block, take a gander
// at all of the predecessors of this instruction, and simplify them.
if (&BB->front() != UI) return Changed;
-
+
SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
TerminatorInst *TI = Preds[i]->getTerminator();
@@ -2615,7 +2938,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *MaxBlock = 0;
for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second.first > MaxPop ||
+ if (I->second.first > MaxPop ||
(I->second.first == MaxPop && MaxIndex > I->second.second)) {
MaxPop = I->second.first;
MaxIndex = I->second.second;
@@ -2627,13 +2950,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// edges to it.
SI->setDefaultDest(MaxBlock);
Changed = true;
-
+
// If MaxBlock has phinodes in it, remove MaxPop-1 entries from
// it.
if (isa<PHINode>(MaxBlock->begin()))
for (unsigned i = 0; i != MaxPop-1; ++i)
MaxBlock->removePredecessor(SI->getParent());
-
+
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i)
if (i.getCaseSuccessor() == MaxBlock) {
@@ -2648,7 +2971,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// place to note that the call does not throw though.
BranchInst *BI = Builder.CreateBr(II->getNormalDest());
II->removeFromParent(); // Take out of symbol table
-
+
// Insert the call now...
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
Builder.SetInsertPoint(BI);
@@ -2663,7 +2986,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
}
-
+
// If this block is now dead, remove it.
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock()) {
@@ -2706,9 +3029,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- Builder.CreateCondBr(
+ BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+ // Update weight for the newly-created conditional branch.
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Combine all weights for the cases to be the true weight of NewBI.
+ // We assume that the sum of all weights for a Terminator can fit into 32
+ // bits.
+ uint32_t NewTrueWeight = 0;
+ for (unsigned I = 1, E = Weights.size(); I != E; ++I)
+ NewTrueWeight += (uint32_t)Weights[I];
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(NewTrueWeight,
+ (uint32_t)Weights[0]));
+ }
+ }
+
// Prune obsolete incoming values off the successor's PHI nodes.
for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
isa<PHINode>(BBI); ++BBI) {
@@ -2739,15 +3081,33 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
}
}
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeight = HasBranchWeights(SI);
+ if (HasWeight) {
+ GetBranchWeights(SI, Weights);
+ HasWeight = (Weights.size() == 1 + SI->getNumCases());
+ }
+
// Remove dead cases from the switch.
for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
assert(Case != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
+ if (HasWeight) {
+ std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+
// Prune unused values from PHI nodes.
Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
+ if (HasWeight) {
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(MDWeights));
+ }
return !DeadCases.empty();
}
@@ -2823,33 +3183,512 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
return Changed;
}
-bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
- // If this switch is too complex to want to look at, ignore it.
- if (!isValueEqualityComparison(SI))
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *LookupConstant(Value *V,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C;
+ return ConstantPool.lookup(V);
+}
+
+/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *ConstantFold(Instruction *I,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
+ if (!B)
+ return 0;
+ return ConstantExpr::get(BO->getOpcode(), A, B);
+ }
+
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
+ if (!B)
+ return 0;
+ return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
+ }
+
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+ if (!A)
+ return 0;
+ if (A->isAllOnesValue())
+ return LookupConstant(Select->getTrueValue(), ConstantPool);
+ if (A->isNullValue())
+ return LookupConstant(Select->getFalseValue(), ConstantPool);
+ return 0;
+ }
+
+ if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+ Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
+ }
+
+ return 0;
+}
+
+/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
+static bool GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value*, Constant*> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+ ++I) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (T->getNumSuccessors() != 1)
+ return false;
+ Pred = CaseDest;
+ CaseDest = T->getSuccessor(0);
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Skip debug intrinsic.
+ continue;
+ } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ // Instruction is side-effect free and constant.
+ ConstantPool.insert(std::make_pair(I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
+ ConstantPool);
+ if (!ConstVal)
+ return false;
+
+ // Note: If the constant comes from constant-propagating the case value
+ // through the CaseDest basic block, it will be safe to remove the
+ // instructions in that block. They cannot be used (except in the phi nodes
+ // we visit) outside CaseDest, because that block does not dominate its
+ // successor. If it did, we would not be in this phi node.
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ // Build up the table contents.
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = 0;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = 0;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const DataLayout *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() * 10 >= TableSize * 4)
+ return true;
+
+ // If each table would fit in a register, we should build it anyway.
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+ return false;
+ }
+ return true;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder,
+ const DataLayout* TD,
+ const TargetTransformInfo *TTI) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Only build lookup table when we have a target that supports it.
+ if (!TTI || !TTI->getScalarTargetTransformInfo() ||
+ !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
return false;
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases is too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = 0;
+ typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ Results))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
+ return false;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Check whether the condition value is within the case range, and branch to
+ // the new BB.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // If the result is used to return immediately from the function, we want to
+ // do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
+ *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ break;
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == SI->getDefaultDest()) continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB) | true;
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB) | true;
- Value *Cond = SI->getCondition();
- if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
- if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB) | true;
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB) | true;
- // If the block only contains the switch, see if we can fold the block
- // away into any preds.
- BasicBlock::iterator BBI = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (SI == &*BBI)
- if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB) | true;
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB) | true;
+ }
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
@@ -2862,13 +3701,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
+ if (SwitchToLookupTable(SI, Builder, TD, TTI))
+ return SimplifyCFG(BB) | true;
+
return false;
}
bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
BasicBlock *BB = IBI->getParent();
bool Changed = false;
-
+
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
@@ -2879,7 +3721,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
--i; --e;
Changed = true;
}
- }
+ }
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
@@ -2887,14 +3729,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
EraseTerminatorInstAndDCECond(IBI);
return true;
}
-
+
if (IBI->getNumDestinations() == 1) {
// If the indirectbr has one successor, change it to a direct branch.
BranchInst::Create(IBI->getDestination(0), IBI);
EraseTerminatorInstAndDCECond(IBI);
return true;
}
-
+
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
return SimplifyCFG(BB) | true;
@@ -2904,13 +3746,16 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
BasicBlock *BB = BI->getParent();
-
+
+ if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ return true;
+
// If the Terminator is the only non-phi instruction, simplify the block.
BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
-
+
// If the only instruction in the block is a seteq/setne comparison
// against a constant, try to simplify the block.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
@@ -2921,7 +3766,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
return true;
}
-
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
@@ -2934,7 +3779,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
-
+
// Conditional branch
if (isValueEqualityComparison(BI)) {
// If we only have one predecessor, and if it is a branch on this value,
@@ -2943,7 +3788,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
return SimplifyCFG(BB) | true;
-
+
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
BasicBlock::iterator I = BB->begin();
@@ -2962,17 +3807,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return SimplifyCFG(BB) | true;
}
}
-
+
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
if (SimplifyBranchOnICmpChain(BI, TD, Builder))
return true;
-
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI))
return SimplifyCFG(BB) | true;
-
+
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
// there is any identical code in the "then" and "else" blocks. If so, we
@@ -2999,14 +3844,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
return SimplifyCFG(BB) | true;
}
-
+
// If this is a branch on a phi node in the current block, thread control
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, TD))
return SimplifyCFG(BB) | true;
-
+
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
@@ -3023,11 +3868,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
if (!C)
return false;
- if (!I->hasOneUse()) // Only look at single-use instructions, for compile time
+ if (I->use_empty())
return false;
if (C->isNullValue()) {
- Instruction *Use = I->use_back();
+ // Only look at the first use, avoid hurting compile time with long uselists
+ User *Use = *I->use_begin();
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
@@ -3114,7 +3960,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
//
if (MergeBlockIntoPredecessor(BB))
return true;
-
+
IRBuilder<> Builder(BB);
// If there is a trivial two-entry PHI node in this basic block, and we can
@@ -3152,6 +3998,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
-bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
- return SimplifyCFGOpt(TD).run(BB);
+bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
+ const TargetTransformInfo *TTI) {
+ return SimplifyCFGOpt(TD, TTI).run(BB);
}
OpenPOWER on IntegriCloud