summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp264
1 files changed, 162 insertions, 102 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b085b00..b14a713 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1308,8 +1308,8 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
case LSRUse::Special:
- // Only handle -1 scales, or no scale.
- return AM.Scale == 0 || AM.Scale == -1;
+ // Special case Basic to handle -1 scales.
+ return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
@@ -1439,7 +1439,41 @@ struct IVInc {
// IVChain - The list of IV increments in program order.
// We typically add the head of a chain without finding subsequent links.
-typedef SmallVector<IVInc,1> IVChain;
+struct IVChain {
+ SmallVector<IVInc,1> Incs;
+ const SCEV *ExprBase;
+
+ IVChain() : ExprBase(0) {}
+
+ IVChain(const IVInc &Head, const SCEV *Base)
+ : Incs(1, Head), ExprBase(Base) {}
+
+ typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
+
+ // begin - return the first increment in the chain.
+ const_iterator begin() const {
+ assert(!Incs.empty());
+ return llvm::next(Incs.begin());
+ }
+ const_iterator end() const {
+ return Incs.end();
+ }
+
+ // hasIncs - Returns true if this chain contains any increments.
+ bool hasIncs() const { return Incs.size() >= 2; }
+
+ // add - Add an IVInc to the end of this chain.
+ void add(const IVInc &X) { Incs.push_back(X); }
+
+ // tailUserInst - Returns the last UserInst in the chain.
+ Instruction *tailUserInst() const { return Incs.back().UserInst; }
+
+ // isProfitableIncrement - Returns true if IncExpr can be profitably added to
+ // this chain.
+ bool isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution&);
+};
/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
/// Distinguish between FarUsers that definitely cross IV increments and
@@ -2160,7 +2194,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
- // can skip the rest of the formulae and procede to the next LSRUse.
+ // can skip the rest of the formulae and proceed to the next LSRUse.
break;
}
}
@@ -2319,41 +2353,23 @@ static const SCEV *getExprBase(const SCEV *S) {
/// increment will be an offset relative to the same base. We allow such offsets
/// to potentially be used as chain increment as long as it's not obviously
/// expensive to expand using real instructions.
-static const SCEV *
-getProfitableChainIncrement(Value *NextIV, Value *PrevIV,
- const IVChain &Chain, Loop *L,
- ScalarEvolution &SE, const TargetLowering *TLI) {
- // Prune the solution space aggressively by checking that both IV operands
- // are expressions that operate on the same unscaled SCEVUnknown. This
- // "base" will be canceled by the subsequent getMinusSCEV call. Checking first
- // avoids creating extra SCEV expressions.
- const SCEV *OperExpr = SE.getSCEV(NextIV);
- const SCEV *PrevExpr = SE.getSCEV(PrevIV);
- if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain)
- return 0;
-
- const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
- if (!SE.isLoopInvariant(IncExpr, L))
- return 0;
-
- // We are not able to expand an increment unless it is loop invariant,
- // however, the following checks are purely for profitability.
+bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution &SE) {
+ // Aggressively form chains when -stress-ivchain.
if (StressIVChain)
- return IncExpr;
+ return true;
// Do not replace a constant offset from IV head with a nonconstant IV
// increment.
if (!isa<SCEVConstant>(IncExpr)) {
- const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand));
+ const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
return 0;
}
SmallPtrSet<const SCEV*, 8> Processed;
- if (isHighCostExpansion(IncExpr, Processed, SE))
- return 0;
-
- return IncExpr;
+ return !isHighCostExpansion(IncExpr, Processed, SE);
}
/// Return true if the number of registers needed for the chain is estimated to
@@ -2372,18 +2388,18 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
if (StressIVChain)
return true;
- if (Chain.size() <= 2)
+ if (!Chain.hasIncs())
return false;
if (!Users.empty()) {
- DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n";
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
E = Users.end(); I != E; ++I) {
dbgs() << " " << **I << "\n";
});
return false;
}
- assert(!Chain.empty() && "empty IV chains are not allowed");
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
// The chain itself may require a register, so intialize cost to 1.
int cost = 1;
@@ -2391,15 +2407,15 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
// A complete chain likely eliminates the need for keeping the original IV in
// a register. LSR does not currently know how to form a complete chain unless
// the header phi already exists.
- if (isa<PHINode>(Chain.back().UserInst)
- && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) {
+ if (isa<PHINode>(Chain.tailUserInst())
+ && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
const SCEV *LastIncExpr = 0;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
- for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
if (I->IncExpr->isZero())
@@ -2435,7 +2451,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
// the stride.
cost -= NumReusedIncrements;
- DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n");
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+ << "\n");
return cost < 0;
}
@@ -2446,25 +2463,39 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
// When IVs are used as types of varying widths, they are generally converted
// to a wider type with some uses remaining narrow under a (free) trunc.
- Value *NextIV = getWideOperand(IVOper);
+ Value *const NextIV = getWideOperand(IVOper);
+ const SCEV *const OperExpr = SE.getSCEV(NextIV);
+ const SCEV *const OperExprBase = getExprBase(OperExpr);
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
const SCEV *LastIncExpr = 0;
for (; ChainIdx < NChains; ++ChainIdx) {
- Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand);
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ // Prune the solution space aggressively by checking that both IV operands
+ // are expressions that operate on the same unscaled SCEVUnknown. This
+ // "base" will be canceled by the subsequent getMinusSCEV call. Checking
+ // first avoids creating extra SCEV expressions.
+ if (!StressIVChain && Chain.ExprBase != OperExprBase)
+ continue;
+
+ Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
if (!isCompatibleIVType(PrevIV, NextIV))
continue;
// A phi node terminates a chain.
- if (isa<PHINode>(UserInst)
- && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
+ if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
+ continue;
+
+ // The increment must be loop-invariant so it can be kept in a register.
+ const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+ const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+ if (!SE.isLoopInvariant(IncExpr, L))
continue;
- if (const SCEV *IncExpr =
- getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx],
- L, SE, TLI)) {
+ if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
LastIncExpr = IncExpr;
break;
}
@@ -2478,24 +2509,24 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
DEBUG(dbgs() << "IV Chain Limit\n");
return;
}
- LastIncExpr = SE.getSCEV(NextIV);
+ LastIncExpr = OperExpr;
// IVUsers may have skipped over sign/zero extensions. We don't currently
// attempt to form chains involving extensions unless they can be hoisted
// into this loop's AddRec.
if (!isa<SCEVAddRecExpr>(LastIncExpr))
return;
++NChains;
- IVChainVec.resize(NChains);
+ IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
+ OperExprBase));
ChainUsersVec.resize(NChains);
- DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr
- << "\n");
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+ << ") IV=" << *LastIncExpr << "\n");
+ } else {
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
+ << ") IV+" << *LastIncExpr << "\n");
+ // Add this IV user to the end of the chain.
+ IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
- else
- DEBUG(dbgs() << "IV Inc: (" << *UserInst << ") IV+" << *LastIncExpr
- << "\n");
-
- // Add this IV user to the end of the chain.
- IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr));
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
// This chain's NearUsers become FarUsers.
@@ -2551,6 +2582,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
/// loop latch. This will discover chains on side paths, but requires
/// maintaining multiple copies of the Chains state.
void LSRInstance::CollectChains() {
+ DEBUG(dbgs() << "Collecting IV Chains.\n");
SmallVector<ChainUsers, 8> ChainUsersVec;
SmallVector<BasicBlock *,8> LatchPath;
@@ -2622,10 +2654,10 @@ void LSRInstance::CollectChains() {
}
void LSRInstance::FinalizeChain(IVChain &Chain) {
- assert(!Chain.empty() && "empty IV chains are not allowed");
- DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n");
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+ DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
- for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
User::op_iterator UseI =
@@ -2659,7 +2691,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
// by LSR.
- const IVInc &Head = Chain[0];
+ const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
@@ -2691,7 +2723,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = 0;
- for (IVChain::const_iterator IncI = llvm::next(Chain.begin()),
+ for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
Instruction *InsertPt = IncI->UserInst;
@@ -2736,7 +2768,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
}
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
- if (isa<PHINode>(Chain.back().UserInst)) {
+ if (isa<PHINode>(Chain.tailUserInst())) {
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
if (!isCompatibleIVType(Phi, IVSrc))
@@ -2804,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2974,42 +3006,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
- SmallVectorImpl<const SCEV *> &Ops,
- const Loop *L,
- ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- CollectSubexprs(*I, C, Ops, L, SE);
- return;
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
- if (!AR->getStart()->isZero()) {
- CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
- AR->getStepRecurrence(SE),
- AR->getLoop(),
- //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap),
- C, Ops, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, L, SE);
- return;
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
- if (Mul->getNumOperands() == 2)
- if (const SCEVConstant *Op0 =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
- CollectSubexprs(Mul->getOperand(1),
- C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, L, SE);
- return;
- }
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
}
-
- // Otherwise use the value itself, optionally with a scale applied.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -3024,7 +3078,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
@@ -4236,13 +4292,6 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
- // Flush the operand list to suppress SCEVExpander hoisting.
- if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
- }
-
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
@@ -4264,23 +4313,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.AM.Scale));
Ops.push_back(ScaledS);
-
- // Flush the operand list to suppress SCEVExpander hoisting.
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the GV portion.
if (F.AM.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ }
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
@@ -4485,7 +4545,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// Mark phi nodes that terminate chains so the expander tries to reuse them.
for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
- if (PHINode *PN = dyn_cast<PHINode>(ChainI->back().UserInst))
+ if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
Rewriter.setChainedPhi(PN);
}
OpenPOWER on IntegriCloud