diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 151 |
1 files changed, 74 insertions, 77 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 944f409..7579748 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -143,10 +143,10 @@ namespace { /// inside the loop then try to eliminate the cast opeation. void OptimizeShadowIV(Loop *L); - /// OptimizeSMax - Rewrite the loop's terminating condition - /// if it uses an smax computation. - ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse); + /// OptimizeMax - Rewrite the loop's terminating condition + /// if it uses a max computation. + ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); @@ -336,13 +336,6 @@ namespace { /// EmittedBase. Value *OperandValToReplace; - /// isSigned - The stride (and thus also the Base) of this use may be in - /// a narrower type than the use itself (OperandValToReplace->getType()). - /// When this is the case, the isSigned field indicates whether the - /// IV expression should be signed-extended instead of zero-extended to - /// fit the type of the use. - bool isSigned; - /// Imm - The immediate value that should be added to the base immediately /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that @@ -363,7 +356,6 @@ namespace { BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - isSigned(IVSU.isSigned()), Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} @@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); } - if (isSigned) - NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty); - else - NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty); - return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } @@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, SCEVHandle Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { @@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, bool isAddress, Loop *L, ScalarEvolution *SE) { if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { @@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } @@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, +static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs, SCEVHandle Expr, ScalarEvolution *SE) { if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { @@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. - std::vector<SCEVHandle> UniqueSubExprs; + SmallVector<SCEVHandle, 16> UniqueSubExprs; - std::vector<SCEVHandle> SubExprs; + SmallVector<SCEVHandle, 16> SubExprs; unsigned NumUsesInsideLoop = 0; for (unsigned i = 0; i != NumUses; ++i) { // If the user is outside the loop, just ignore it for base computation. @@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) { return SC->getValue()->getValue().isNegative(); } -// CollectIVUsers - Transform our list of users and offsets to a bit more -// complex table. In this new vector, each 'BasedUser' contains 'Base', the base -// of the strided accesses, as well as the old information from Uses. We -// progressively move information from the Base field to the Imm field, until -// we eventually have the full access expression to rewrite the use. +/// CollectIVUsers - Transform our list of users and offsets to a bit more +/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base +/// of the strided accesses, as well as the old information from Uses. We +/// progressively move information from the Base field to the Imm field, until +/// we eventually have the full access expression to rewrite the use. SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, @@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (!isa<PointerType>(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { - ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); + Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits ? SE->getMulExpr(CondUse->getOffset(), - SE->getConstant(ConstantInt::get(CmpTy, Scale))) - : SE->getConstant(ConstantInt::get(NewCmpIntTy, + SE->getConstant(CmpTy, Scale)) + : SE->getConstant(NewCmpIntTy, cast<SCEVConstant>(CondUse->getOffset())->getValue() - ->getSExtValue()*Scale)); + ->getSExtValue()*Scale); break; } } @@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false); + IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS); CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; @@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, return Cond; } -/// OptimizeSMax - Rewrite the loop's terminating condition if it uses -/// an smax computation. +/// OptimizeMax - Rewrite the loop's terminating condition if it uses +/// a max computation. /// /// This is a narrow solution to a specific, but acute, problem. For loops /// like this: @@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// p[i] = 0.0; /// } while (++i < n); /// -/// where the comparison is signed, the trip count isn't just 'n', because -/// 'n' could be negative. And unfortunately this can come up even for loops -/// where the user didn't use a C do-while loop. For example, seemingly -/// well-behaved top-test loops will commonly be lowered like this: +/// the trip count isn't just 'n', because 'n' might not be positive. And +/// unfortunately this can come up even for loops where the user didn't use +/// a C do-while loop. For example, seemingly well-behaved top-test loops +/// will commonly be lowered like this: // /// if (n > 0) { /// i = 0; @@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// test in such a way that indvars can't find it. /// /// When indvars can't find the if test in loops like this, it creates a -/// signed-max expression, which allows it to give the loop a canonical +/// max expression, which allows it to give the loop a canonical /// induction variable: /// /// i = 0; -/// smax = n < 1 ? 1 : n; +/// max = n < 1 ? 1 : n; /// do { /// p[i] = 0.0; -/// } while (++i != smax); +/// } while (++i != max); /// /// Canonical induction variables are necessary because the loop passes /// are designed around them. The most obvious example of this is the @@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. /// -ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse) { +ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse) { // Check that the loop matches the pattern we're looking for. if (Cond->getPredicate() != CmpInst::ICMP_EQ && Cond->getPredicate() != CmpInst::ICMP_NE) @@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. - const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount); - if (!SMax || SMax != SE->getSCEV(Sel)) return Cond; + if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) + return Cond; + const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); + if (Max != SE->getSCEV(Sel)) return Cond; + + // To handle a max with more than two operands, this optimization would + // require additional checking and setup. + if (Max->getNumOperands() != 2) + return Cond; - SCEVHandle SMaxLHS = SMax->getOperand(0); - SCEVHandle SMaxRHS = SMax->getOperand(1); - if (!SMaxLHS || SMaxLHS != One) return Cond; + SCEVHandle MaxLHS = Max->getOperand(0); + SCEVHandle MaxRHS = Max->getOperand(1); + if (!MaxLHS || MaxLHS != One) return Cond; // Check the relevant induction variable for conformance to // the pattern. @@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS) + if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); - else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS) + else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); if (!NewRHS) return Cond; + // Determine the new comparison opcode. It may be signed or unsigned, + // and the original comparison may be either equality or inequality. + CmpInst::Predicate Pred = + isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + if (Cond->getPredicate() == CmpInst::ICMP_EQ) + Pred = CmpInst::getInversePredicate(Pred); + // Ok, everything looks ok to change the condition into an SLT or SGE and // delete the max calculation. ICmpInst *NewCond = - new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ? - CmpInst::ICMP_SLT : - CmpInst::ICMP_SGE, - Cond->getOperand(0), NewRHS, "scmp", Cond); + new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond); // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); @@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); /* create new increment. '++d' in above example. */ - ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, @@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { } } -// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar -// uses in the loop, look to see if we can eliminate some, in favor of using -// common indvars for the different uses. +/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar +/// uses in the loop, look to see if we can eliminate some, in favor of using +/// common indvars for the different uses. void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // TODO: implement optzns here. @@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // induction variable, to allow coalescing the live ranges for the IV into // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); - BasicBlock *ExitBlock = L->getExitingBlock(); - if (!ExitBlock) + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) // Multiple exits, just look at the exit in the latch block if there is one. - ExitBlock = LatchBlock; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator()); + ExitingBlock = LatchBlock; + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!TermBr) return; if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) @@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. - if (ExitBlock != LatchBlock) { + if (ExitingBlock != LatchBlock) { if (!Cond->hasOneUse()) // See below, we don't want the condition to be cloned. return; @@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { StrideNoReuse.insert(*CondStride); } - // If the trip count is computed in terms of an smax (due to ScalarEvolution + // If the trip count is computed in terms of a max (due to ScalarEvolution // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT instead of NE. - Cond = OptimizeSMax(L, Cond, CondUse); + // comparison to use SLT or ULT instead of NE. + Cond = OptimizeMax(L, Cond, CondUse); // If possible, change stride and operands of the compare instruction to // eliminate one stride. - if (ExitBlock == LatchBlock) + if (ExitingBlock == LatchBlock) Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); // It's possible for the setcc instruction to be anywhere in the loop, and @@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Clone the IVUse, as the old use still exists! IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace(), - false); + CondUse->getOperandValToReplace()); CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); } } @@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { ++NumLoopCond; } -// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -// when to exit the loop is used only for that purpose, try to rearrange things -// so it counts down to a test against zero. +/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding +/// when to exit the loop is used only for that purpose, try to rearrange things +/// so it counts down to a test against zero. void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // If the number of times the loop is executed isn't computable, give up. @@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { Value *startVal = phi->getIncomingValue(inBlock); Value *endVal = Cond->getOperand(1); // FIXME check for case where both are constant - ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); + Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, endVal, startVal, "tmp", PreInsertPt); |