summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp1772
1 files changed, 1030 insertions, 742 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 04ee7c8..dee3d38 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -52,30 +52,32 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
-STATISTIC(NumRemoved , "Number of aux indvars removed");
-STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted, "Number of canonical indvars added");
-STATISTIC(NumReplaced, "Number of exit values replaced");
-STATISTIC(NumLFTR , "Number of loop exit tests replaced");
-STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
-
-// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
-// and referenced here.
-namespace llvm {
- extern bool DisableIVRewrite;
-}
+STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumWidened , "Number of indvars widened");
+STATISTIC(NumInserted , "Number of canonical indvars added");
+STATISTIC(NumReplaced , "Number of exit values replaced");
+STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
+
+static cl::opt<bool> DisableIVRewrite(
+ "disable-iv-rewrite", cl::Hidden,
+ cl::desc("Disable canonical induction variable rewriting"));
namespace {
class IndVarSimplify : public LoopPass {
@@ -84,12 +86,14 @@ namespace {
ScalarEvolution *SE;
DominatorTree *DT;
TargetData *TD;
+
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) {
+ IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+ Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -101,36 +105,46 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<IVUsers>();
+ if (!DisableIVRewrite)
+ AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<IVUsers>();
+ if (!DisableIVRewrite)
+ AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
private:
+ virtual void releaseMemory() {
+ DeadInsts.clear();
+ }
+
bool isValidRewrite(Value *FromVal, Value *ToVal);
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
+ void RewriteNonIntegerIVs(Loop *L);
+
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
void SimplifyIVUsers(SCEVExpander &Rewriter);
+ void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+ bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void EliminateIVRemainder(BinaryOperator *Rem,
Value *IVOperand,
- bool IsSigned,
- PHINode *IVPhi);
- void RewriteNonIntegerIVs(Loop *L);
+ bool IsSigned);
+
+ void SimplifyCongruentIVs(Loop *L);
+
+ void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
- void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
void SinkUnusedInvariants(Loop *L);
-
- void HandleFloatingPointIV(Loop *L, PHINode *PH);
};
}
@@ -197,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
return true;
}
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
- BackedgeTakenCount->isZero())
- return false;
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
- if (!L->getExitingBlock())
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+ bool isExact = false;
+ if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
return false;
-
- // Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
+ // See if we can convert this to an int64_t
+ uint64_t UIntVal;
+ if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+ &isExact) != APFloat::opOK || !isExact)
return false;
-
- // Special case: If the backedge-taken count is a UDiv, it's very likely a
- // UDiv that ScalarEvolution produced in order to compute a precise
- // expression, rather than a UDiv from the user's code. If we can't find a
- // UDiv in the code with some simple searching, assume the former and forego
- // rewriting the loop.
- if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return false;
- const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
- R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
- if (R != BackedgeTakenCount) {
- const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
- L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
- if (L != BackedgeTakenCount)
- return false;
- }
- }
+ IntVal = UIntVal;
return true;
}
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
///
-/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
-static const Type *getBackedgeIVType(Loop *L) {
- if (!L->getExitingBlock())
- return 0;
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
- // Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
- return 0;
+ // Check incoming value.
+ ConstantFP *InitValueVal =
+ dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!Cond)
- return 0;
+ int64_t InitValue;
+ if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+ return;
- const Type *Ty = 0;
- for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
- OI != OE; ++OI) {
- assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
- TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
- if (!Trunc)
- continue;
+ // Check IV increment. Reject this PN if increment operation is not
+ // an add or increment value can not be represented by an integer.
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
- return Trunc->getSrcTy();
+ // If this is not an add of the PHI with a constantfp, or if the constant fp
+ // is not an integer, bail out.
+ ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+ int64_t IncValue;
+ if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+ return;
+
+ // Check Incr uses. One user is PN and the other user is an exit condition
+ // used by the conditional terminator.
+ Value::use_iterator IncrUse = Incr->use_begin();
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
+ if (IncrUse == Incr->use_end()) return;
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
+ if (IncrUse != Incr->use_end()) return;
+
+ // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
+ // only used by a branch, we can't transform it.
+ FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+ if (!Compare)
+ Compare = dyn_cast<FCmpInst>(U2);
+ if (Compare == 0 || !Compare->hasOneUse() ||
+ !isa<BranchInst>(Compare->use_back()))
+ return;
+
+ BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+ // We need to verify that the branch actually controls the iteration count
+ // of the loop. If not, the new IV can overflow and no one will notice.
+ // The branch block must be in the loop and one of the successors must be out
+ // of the loop.
+ assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+ if (!L->contains(TheBr->getParent()) ||
+ (L->contains(TheBr->getSuccessor(0)) &&
+ L->contains(TheBr->getSuccessor(1))))
+ return;
+
+
+ // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+ // transform it.
+ ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+ int64_t ExitValue;
+ if (ExitValueVal == 0 ||
+ !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+ return;
+
+ // Find new predicate for integer comparison.
+ CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (Compare->getPredicate()) {
+ default: return; // Unknown comparison.
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
}
- return Ty;
-}
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable. This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
- const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ // We convert the floating point induction variable to a signed i32 value if
+ // we can. This is only safe if the comparison will not overflow in a way
+ // that won't be trapped by the integer equivalent operations. Check for this
+ // now.
+ // TODO: We could use i64 if it is native and the range requires it.
- // If the exiting block is not the same as the backedge block, we must compare
- // against the preincremented value, otherwise we prefer to compare against
- // the post-incremented value.
- Value *CmpIndVar;
- const SCEV *RHS = BackedgeTakenCount;
- if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // If this addition may overflow, we have to be more pessimistic and
- // cast the induction variable before doing the add.
- const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
- const SCEV *N =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- if ((isa<SCEVConstant>(N) && !N->isZero()) ||
- SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
- // No overflow. Cast the sum.
- RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
- } else {
- // Potential overflow. Cast before doing the add.
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- RHS = SE->getAddExpr(RHS,
- SE->getConstant(IndVar->getType(), 1));
+ // The start/stride/exit values must all fit in signed i32.
+ if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+ return;
+
+ // If not actually striding (add x, 0.0), avoid touching the code.
+ if (IncValue == 0)
+ return;
+
+ // Positive and negative strides have different safety conditions.
+ if (IncValue > 0) {
+ // If we have a positive stride, we require the init to be less than the
+ // exit value and an equality or less than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ return;
+
+ uint32_t Range = uint32_t(ExitValue-InitValue);
+ if (NewPred == CmpInst::ICMP_SLE) {
+ // Normalize SLE -> SLT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
}
- // The BackedgeTaken expression contains the number of times that the
- // backedge branches to the loop header. This is one less than the
- // number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ unsigned Leftover = Range % uint32_t(IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+ return;
+
} else {
- // We have to use the preincremented value...
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- CmpIndVar = IndVar;
+ // If we have a negative stride, we require the init to be greater than the
+ // exit value and an equality or greater than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ return;
+
+ uint32_t Range = uint32_t(InitValue-ExitValue);
+ if (NewPred == CmpInst::ICMP_SGE) {
+ // Normalize SGE -> SGT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(-IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+ return;
}
- // Expand the code for the iteration count.
- assert(SE->isLoopInvariant(RHS, L) &&
- "Computed iteration count is not loop invariant!");
- Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+ const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
- // Insert a new icmp_ne or icmp_eq instruction before the branch.
- ICmpInst::Predicate Opcode;
- if (L->contains(BI->getSuccessor(0)))
- Opcode = ICmpInst::ICMP_NE;
- else
- Opcode = ICmpInst::ICMP_EQ;
+ // Insert new integer induction variable.
+ PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+ NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+ PN->getIncomingBlock(IncomingEdge));
- DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
- << " LHS:" << *CmpIndVar << '\n'
- << " op:\t"
- << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
- << " RHS:\t" << *RHS << "\n");
+ Value *NewAdd =
+ BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+ Incr->getName()+".int", Incr);
+ NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
- ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+ ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+ ConstantInt::get(Int32Ty, ExitValue),
+ Compare->getName());
- Value *OrigCond = BI->getCondition();
- // It's tempting to use replaceAllUsesWith here to fully replace the old
- // comparison, but that's not immediately safe, since users of the old
- // comparison may not be dominated by the new comparison. Instead, just
- // update the branch to use the new comparison; in the common case this
- // will make old comparison dead.
- BI->setCondition(Cond);
- DeadInsts.push_back(OrigCond);
+ // In the following deletions, PN may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PN;
- ++NumLFTR;
- Changed = true;
- return Cond;
+ // Delete the old floating point exit comparison. The branch starts using the
+ // new comparison.
+ NewCompare->takeName(Compare);
+ Compare->replaceAllUsesWith(NewCompare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+ // Delete the old floating point increment.
+ Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+ // If the FP induction variable still has uses, this is because something else
+ // in the loop uses its value. In order to canonicalize the induction
+ // variable, we chose to eliminate the IV and rewrite it in terms of an
+ // int->fp cast.
+ //
+ // We give preference to sitofp over uitofp because it is faster on most
+ // platforms.
+ if (WeakPH) {
+ Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+ PN->getParent()->getFirstNonPHI());
+ PN->replaceAllUsesWith(Conv);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ }
+
+ // Add a new IVUsers entry for the newly-created integer PHI.
+ if (IU)
+ IU->AddUsersIfInteresting(NewPHI);
}
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+ // First step. Check to see if there are any floating-point recurrences.
+ // If there are, change them into integer recurrences, permitting analysis by
+ // the SCEV routines.
+ //
+ BasicBlock *Header = L->getHeader();
+
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+ HandleFloatingPointIV(L, PN);
+
+ // If the loop previously had floating-point IV, ScalarEvolution
+ // may not have been able to compute a trip count. Now that we've done some
+ // re-writing, the trip count may be computable.
+ if (Changed)
+ SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
/// RewriteLoopExitValues - Check to see if this loop has a computable
/// loop-invariant execution count. If so, this means that we can compute the
/// final value of any expressions that are recurrent in the loop, and
@@ -460,29 +580,168 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
Rewriter.clearInsertPoint();
}
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
- // First step. Check to see if there are any floating-point recurrences.
- // If there are, change them into integer recurrences, permitting analysis by
- // the SCEV routines.
+//===----------------------------------------------------------------------===//
+// Rewrite IV users based on a canonical IV.
+// To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+ // Each round of simplification involves a round of eliminating operations
+ // followed by a round of widening IVs. A single IVUsers worklist is used
+ // across all rounds. The inner loop advances the user. If widening exposes
+ // more uses, then another pass through the outer loop is triggered.
+ for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+ Instruction *UseInst = I->getUser();
+ Value *IVOperand = I->getOperandValToReplace();
+
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
+ continue;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned);
+ continue;
+ }
+ }
+ }
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables. Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials. For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+ // Loop-invariant values are safe.
+ if (SE->isLoopInvariant(S, L)) return true;
+
+ // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+ // to transform them into efficient code.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ return AR->isAffine();
+
+ // An add is safe it all its operands are safe.
+ if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+ E = Commutative->op_end(); I != E; ++I)
+ if (!isSafe(*I, L, SE)) return false;
+ return true;
+ }
+
+ // A cast is safe if its operand is.
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ return isSafe(C->getOperand(), L, SE);
+
+ // A udiv is safe if its operands are.
+ if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+ return isSafe(UD->getLHS(), L, SE) &&
+ isSafe(UD->getRHS(), L, SE);
+
+ // SCEVUnknown is always safe.
+ if (isa<SCEVUnknown>(S))
+ return true;
+
+ // Nothing else is safe.
+ return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+ // Rewrite all induction variable expressions in terms of the canonical
+ // induction variable.
//
- BasicBlock *Header = L->getHeader();
+ // If there were induction variables of other sizes or offsets, manually
+ // add the offsets to the primary induction variable and cast, avoiding
+ // the need for the code evaluation methods to insert induction variables
+ // of different sizes.
+ for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+ Value *Op = UI->getOperandValToReplace();
+ const Type *UseTy = Op->getType();
+ Instruction *User = UI->getUser();
- SmallVector<WeakVH, 8> PHIs;
- for (BasicBlock::iterator I = Header->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PHIs.push_back(PN);
+ // Compute the final addrec to expand into code.
+ const SCEV *AR = IU->getReplacementExpr(*UI);
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
- HandleFloatingPointIV(L, PN);
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(UI->getUser())) {
+ const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+ if (SE->isLoopInvariant(ExitVal, L))
+ AR = ExitVal;
+ }
- // If the loop previously had floating-point IV, ScalarEvolution
- // may not have been able to compute a trip count. Now that we've done some
- // re-writing, the trip count may be computable.
- if (Changed)
- SE->forgetLoop(L);
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!isSafe(AR, L, SE))
+ continue;
+
+ // Determine the insertion point for this user. By default, insert
+ // immediately before the user. The SCEVExpander class will automatically
+ // hoist loop invariants out of the loop. For PHI nodes, there may be
+ // multiple uses, so compute the nearest common dominator for the
+ // incoming blocks.
+ Instruction *InsertPt = User;
+ if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingValue(i) == Op) {
+ if (InsertPt == User)
+ InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+ else
+ InsertPt =
+ DT->findNearestCommonDominator(InsertPt->getParent(),
+ PHI->getIncomingBlock(i))
+ ->getTerminator();
+ }
+
+ // Now expand it into actual Instructions and patch it into place.
+ Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+
+ if (!isValidRewrite(Op, NewVal)) {
+ DeadInsts.push_back(NewVal);
+ continue;
+ }
+ // Inform ScalarEvolution that this value is changing. The change doesn't
+ // affect its value, but it does potentially affect which use lists the
+ // value will be on after the replacement, which affects ScalarEvolution's
+ // ability to walk use lists and drop dangling pointers when a value is
+ // deleted.
+ SE->forgetValue(User);
+
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+ NewValI->setDebugLoc(User->getDebugLoc());
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
+ }
}
+//===----------------------------------------------------------------------===//
+// IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
namespace {
// Collect information about induction variables that are used by sign/zero
// extend operations. This information is recorded by CollectExtend and
@@ -493,33 +752,30 @@ namespace {
WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
};
- typedef std::map<PHINode *, WideIVInfo> WideIVMap;
}
/// CollectExtend - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned,
- WideIVMap &IVMap, ScalarEvolution *SE,
- const TargetData *TD) {
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+ ScalarEvolution *SE, const TargetData *TD) {
const Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
if (TD && !TD->isLegalInteger(Width))
return;
- WideIVInfo &IVInfo = IVMap[Phi];
- if (!IVInfo.WidestNativeType) {
- IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
- IVInfo.IsSigned = IsSigned;
+ if (!WI.WidestNativeType) {
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ WI.IsSigned = IsSigned;
return;
}
// We extend the IV to satisfy the sign of its first user, arbitrarily.
- if (IVInfo.IsSigned != IsSigned)
+ if (WI.IsSigned != IsSigned)
return;
- if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType))
- IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+ WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
}
namespace {
@@ -529,43 +785,45 @@ namespace {
/// inserting truncs whenever we stop propagating the type.
///
class WidenIV {
+ // Parameters
PHINode *OrigPhi;
const Type *WideType;
bool IsSigned;
- IVUsers *IU;
- LoopInfo *LI;
- Loop *L;
+ // Context
+ LoopInfo *LI;
+ Loop *L;
ScalarEvolution *SE;
- DominatorTree *DT;
- SmallVectorImpl<WeakVH> &DeadInsts;
+ DominatorTree *DT;
+ // Result
PHINode *WidePhi;
Instruction *WideInc;
const SCEV *WideIncExpr;
+ SmallVectorImpl<WeakVH> &DeadInsts;
- SmallPtrSet<Instruction*,16> Processed;
+ SmallPtrSet<Instruction*,16> Widened;
+ SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
public:
- WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers,
- LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree,
+ WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+ ScalarEvolution *SEv, DominatorTree *DTree,
SmallVectorImpl<WeakVH> &DI) :
OrigPhi(PN),
- WideType(IVInfo.WidestNativeType),
- IsSigned(IVInfo.IsSigned),
- IU(IUsers),
+ WideType(WI.WidestNativeType),
+ IsSigned(WI.IsSigned),
LI(LInfo),
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
- DeadInsts(DI),
WidePhi(0),
WideInc(0),
- WideIncExpr(0) {
+ WideIncExpr(0),
+ DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
- bool CreateWideIV(SCEVExpander &Rewriter);
+ PHINode *CreateWideIV(SCEVExpander &Rewriter);
protected:
Instruction *CloneIVUser(Instruction *NarrowUse,
@@ -574,58 +832,13 @@ protected:
const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
- Instruction *WidenIVUse(Instruction *NarrowUse,
- Instruction *NarrowDef,
+ Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
Instruction *WideDef);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
- WideIVMap IVMap;
-
- // Each round of simplification involves a round of eliminating operations
- // followed by a round of widening IVs. A single IVUsers worklist is used
- // across all rounds. The inner loop advances the user. If widening exposes
- // more uses, then another pass through the outer loop is triggered.
- for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) {
- for(; I != E; ++I) {
- Instruction *UseInst = I->getUser();
- Value *IVOperand = I->getOperandValToReplace();
-
- if (DisableIVRewrite) {
- if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
- bool IsSigned = Cast->getOpcode() == Instruction::SExt;
- if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
- CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD);
- continue;
- }
- }
- }
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- continue;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi());
- continue;
- }
- }
- }
- for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end();
- I != E; ++I) {
- WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts);
- if (Widener.CreateWideIV(Rewriter))
- Changed = true;
- }
- }
-}
-
static Value *getExtend( Value *NarrowOper, const Type *WideType,
bool IsSigned, IRBuilder<> &Builder) {
return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
@@ -671,34 +884,16 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
LHS, RHS,
NarrowBO->getName());
Builder.Insert(WideBO);
- if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
- if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
-
+ if (const OverflowingBinaryOperator *OBO =
+ dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+ if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+ if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ }
return WideBO;
}
llvm_unreachable(0);
}
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
- if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
-
- const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
- const SCEV *WideExpr = IsSigned ?
- SE->getSignExtendExpr(NarrowExpr, WideType) :
- SE->getZeroExtendExpr(NarrowExpr, WideType);
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
- if (!AddRec || AddRec->getLoop() != L)
- return 0;
-
- return AddRec;
-}
-
/// HoistStep - Attempt to hoist an IV increment above a potential use.
///
/// To successfully hoist, two criteria must be met:
@@ -733,18 +928,41 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
return true;
}
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+ if (!SE->isSCEVable(NarrowUse->getType()))
+ return 0;
+
+ const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType())
+ >= SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return 0;
+ }
+
+ const SCEV *WideExpr = IsSigned ?
+ SE->getSignExtendExpr(NarrowExpr, WideType) :
+ SE->getZeroExtendExpr(NarrowExpr, WideType);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+
+ return AddRec;
+}
+
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
- Instruction *NarrowDef,
+Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
Instruction *WideDef) {
- // To be consistent with IVUsers, stop traversing the def-use chain at
- // inner-loop phis or post-loop phis.
- if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
- return 0;
+ Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
- // Handle data flow merges and bizarre phi cycles.
- if (!Processed.insert(NarrowUse))
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
return 0;
// Our raison d'etre! Eliminate sign and zero extension.
@@ -755,7 +973,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
if (CastWidth < IVWidth) {
// The cast isn't as wide as the IV, so insert a Trunc.
- IRBuilder<> Builder(NarrowUse);
+ IRBuilder<> Builder(NarrowDefUse);
NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
}
else {
@@ -775,23 +993,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
NarrowUse->replaceAllUsesWith(NewDef);
DeadInsts.push_back(NarrowUse);
}
- // Now that the extend is gone, expose it's uses to IVUsers for potential
- // further simplification within SimplifyIVUsers.
- IU->AddUsersIfInteresting(WideDef, WidePhi);
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
return 0;
}
+
+ // Does this user itself evaluate to a recurrence after widening?
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- IRBuilder<> Builder(NarrowUse);
+ IRBuilder<> Builder(NarrowDefUse);
Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
return 0;
}
+ // We assume that block terminators are not SCEVable. We wouldn't want to
+ // insert a Trunc after a terminator if there happens to be a critical edge.
+ assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+ "SCEV is not expected to evaluate a block terminator");
+
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
@@ -803,11 +1030,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
if (!WideUse)
return 0;
}
- // GetWideRecurrence ensured that the narrow expression could be extended
- // outside the loop without overflow. This suggests that the wide use
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
// evaluates to the same expression as the extended narrow use, but doesn't
// absolutely guarantee it. Hence the following failsafe check. In rare cases
- // where it fails, we simple throw away the newly created wide use.
+ // where it fails, we simply throw away the newly created wide use.
if (WideAddRec != SE->getSCEV(WideUse)) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
@@ -819,21 +1046,36 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
return WideUse;
}
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ for (Value::use_iterator UI = NarrowDef->use_begin(),
+ UE = NarrowDef->use_end(); UI != UE; ++UI) {
+ Use &U = UI.getUse();
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(cast<Instruction>(U.getUser())))
+ continue;
+
+ NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+ }
+}
+
/// CreateWideIV - Process a single induction variable. First use the
/// SCEVExpander to create a wide induction variable that evaluates to the same
/// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
/// interesting IV users, the narrow IV will be isolated for removal by
/// DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
-bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
- return false;
+ return NULL;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
@@ -846,9 +1088,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
- return false;
+ return NULL;
- // An AddRec must have loop-invariant operands. Since this AddRec it
+ // An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
// operands, so they must dominate the loop header.
assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
@@ -876,39 +1118,37 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
++NumWidened;
// Traverse the def-use chain using a worklist starting at the original IV.
- assert(Processed.empty() && "expect initial state" );
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
- // Each worklist entry has a Narrow def-use link and Wide def.
- SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
- for (Value::use_iterator UI = OrigPhi->use_begin(),
- UE = OrigPhi->use_end(); UI != UE; ++UI) {
- NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi));
- }
while (!NarrowIVUsers.empty()) {
- Use *NarrowDefUse;
+ Use *UsePtr;
Instruction *WideDef;
- tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val();
+ tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
+ Use &NarrowDefUse = *UsePtr;
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get());
- Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser());
- Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef);
+ Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
+ Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
// Follow all def-use edges from the previous narrow use.
- if (WideUse) {
- for (Value::use_iterator UI = NarrowUse->use_begin(),
- UE = NarrowUse->use_end(); UI != UE; ++UI) {
- NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse));
- }
- }
+ if (WideUse)
+ pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+
// WidenIVUse may have removed the def-use edge.
if (NarrowDef->use_empty())
DeadInsts.push_back(NarrowDef);
}
- return true;
+ return WidePhi;
}
+//===----------------------------------------------------------------------===//
+// Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
@@ -945,8 +1185,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
Value *IVOperand,
- bool IsSigned,
- PHINode *IVPhi) {
+ bool IsSigned) {
// We're only interested in the case where we know something about
// the numerator.
if (IVOperand != Rem->getOperand(0))
@@ -989,15 +1228,465 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
}
// Inform IVUsers about the new users.
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I, IVPhi);
-
+ if (IU) {
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I);
+ }
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
Changed = true;
DeadInsts.push_back(Rem);
}
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // We assume that terminators are not SCEVable.
+ assert((!S || I != I->getParent()->getTerminator()) &&
+ "can't fold terminators");
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+ std::map<PHINode *, WideIVInfo> WideIVMap;
+
+ SmallVector<PHINode*, 8> LoopPhis;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ LoopPhis.push_back(cast<PHINode>(I));
+ }
+ // Each round of simplification iterates through the SimplifyIVUsers worklist
+ // for all current phis, then determines whether any IVs can be
+ // widened. Widening adds new phis to LoopPhis, inducing another round of
+ // simplification on the wide IVs.
+ while (!LoopPhis.empty()) {
+ // Evaluate as many IV expressions as possible before widening any IVs. This
+ // forces SCEV to set no-wrap flags before evaluating sign/zero
+ // extension. The first time SCEV attempts to normalize sign/zero extension,
+ // the result becomes final. So for the most predictable results, we delay
+ // evaluation of sign/zero extend evaluation until needed, and avoid running
+ // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+ do {
+ PHINode *CurrIV = LoopPhis.pop_back_val();
+
+ // Information about sign/zero extensions of CurrIV.
+ WideIVInfo WI;
+
+ // Instructions processed by SimplifyIVUsers for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ Instruction *UseInst, *Operand;
+ tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseInst == CurrIV) continue;
+
+ if (EliminateIVUser(UseInst, Operand)) {
+ pushIVUsers(Operand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+ CollectExtend(Cast, IsSigned, WI, SE, TD);
+ }
+ continue;
+ }
+ if (isSimpleIVUser(UseInst, L, SE)) {
+ pushIVUsers(UseInst, Simplified, SimpleIVUsers);
+ }
+ }
+ if (WI.WidestNativeType) {
+ WideIVMap[CurrIV] = WI;
+ }
+ } while(!LoopPhis.empty());
+
+ for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
+ E = WideIVMap.end(); I != E; ++I) {
+ WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+ if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ Changed = true;
+ LoopPhis.push_back(WidePhi);
+ }
+ }
+ WideIVMap.clear();
+ }
+}
+
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ const SCEV *S = SE->getSCEV(Phi);
+ DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+ bool Inserted;
+ tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+ if (Inserted)
+ continue;
+ PHINode *OrigPhi = Pos->second;
+ // Replacing the congruent phi is sufficient because acyclic redundancy
+ // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+ // that a phi is congruent, it's almost certain to be the head of an IV
+ // user cycle that is isomorphic with the original phi. So it's worth
+ // eagerly cleaning up the common case of a single IV increment.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+ if (OrigInc != IsomorphicInc &&
+ SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+ HoistStep(OrigInc, IsomorphicInc, DT)) {
+ DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ IsomorphicInc->replaceAllUsesWith(OrigInc);
+ DeadInsts.push_back(IsomorphicInc);
+ }
+ }
+ DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+ ++NumElimIV;
+ Phi->replaceAllUsesWith(OrigPhi);
+ DeadInsts.push_back(Phi);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ BackedgeTakenCount->isZero())
+ return false;
+
+ if (!L->getExitingBlock())
+ return false;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return false;
+
+ // Special case: If the backedge-taken count is a UDiv, it's very likely a
+ // UDiv that ScalarEvolution produced in order to compute a precise
+ // expression, rather than a UDiv from the user's code. If we can't find a
+ // UDiv in the code with some simple searching, assume the former and forego
+ // rewriting the loop.
+ if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return false;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != BackedgeTakenCount) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != BackedgeTakenCount)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+ if (!L->getExitingBlock())
+ return 0;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return 0;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return 0;
+
+ const Type *Ty = 0;
+ for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+ OI != OE; ++OI) {
+ assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+ TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+ if (!Trunc)
+ continue;
+
+ return Trunc->getSrcTy();
+ }
+ return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+ const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ SCEVExpander &Rewriter) {
+ assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+ // If the exiting block is not the same as the backedge block, we must compare
+ // against the preincremented value, otherwise we prefer to compare against
+ // the post-incremented value.
+ Value *CmpIndVar;
+ const SCEV *RHS = BackedgeTakenCount;
+ if (L->getExitingBlock() == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // If this addition may overflow, we have to be more pessimistic and
+ // cast the induction variable before doing the add.
+ const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+ const SCEV *N =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+ } else {
+ // Potential overflow. Cast before doing the add.
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ RHS = SE->getAddExpr(RHS,
+ SE->getConstant(IndVar->getType(), 1));
+ }
+
+ // The BackedgeTaken expression contains the number of times that the
+ // backedge branches to the loop header. This is one less than the
+ // number of times the loop executes, so use the incremented indvar.
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ } else {
+ // We have to use the preincremented value...
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ CmpIndVar = IndVar;
+ }
+
+ // Expand the code for the iteration count.
+ assert(SE->isLoopInvariant(RHS, L) &&
+ "Computed iteration count is not loop invariant!");
+ Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+ // Insert a new icmp_ne or icmp_eq instruction before the branch.
+ ICmpInst::Predicate Opcode;
+ if (L->contains(BI->getSuccessor(0)))
+ Opcode = ICmpInst::ICMP_NE;
+ else
+ Opcode = ICmpInst::ICMP_EQ;
+
+ DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t"
+ << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *RHS << "\n");
+
+ ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+ Cond->setDebugLoc(BI->getDebugLoc());
+ Value *OrigCond = BI->getCondition();
+ // It's tempting to use replaceAllUsesWith here to fully replace the old
+ // comparison, but that's not immediately safe, since users of the old
+ // comparison may not be dominated by the new comparison. Instead, just
+ // update the branch to use the new comparison; in the common case this
+ // will make old comparison dead.
+ BI->setCondition(Cond);
+ DeadInsts.push_back(OrigCond);
+
+ ++NumLFTR;
+ Changed = true;
+ return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Also don't
+ // move instructions which might read memory, since the loop may modify
+ // memory. Note that it's okay if the instruction might have undefined
+ // behavior: LoopSimplify guarantees that the preheader dominates the exit
+ // block.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ continue;
+
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Don't sink static AllocaInsts out of the entry block, which would
+ // turn them into dynamic allocas!
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (AI->isStaticAlloca())
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+
+ if (I != Preheader->begin()) {
+ // Skip debug info intrinsics.
+ do {
+ --I;
+ } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+ if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ Done = true;
+ } else {
+ Done = true;
+ }
+
+ ToMove->moveBefore(InsertPt);
+ if (Done) break;
+ InsertPt = ToMove;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -1010,7 +1699,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- IU = &getAnalysis<IVUsers>();
+ if (!DisableIVRewrite)
+ IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
@@ -1026,9 +1716,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Create a rewriter object which we'll use to transform the code with.
- SCEVExpander Rewriter(*SE);
- if (DisableIVRewrite)
+ SCEVExpander Rewriter(*SE, "indvars");
+
+ // Eliminate redundant IV users.
+ //
+ // Simplification works best when run before other consumers of SCEV. We
+ // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+ // other expressions involving loop IVs have been evaluated. This helps SCEV
+ // set no-wrap flags before normalizing sign/zero extension.
+ if (DisableIVRewrite) {
Rewriter.disableCanonicalMode();
+ SimplifyIVUsersNoRewrite(L, Rewriter);
+ }
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -1040,7 +1739,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV users.
- SimplifyIVUsers(Rewriter);
+ if (!DisableIVRewrite)
+ SimplifyIVUsers(Rewriter);
+
+ // Eliminate redundant IV cycles.
+ if (DisableIVRewrite)
+ SimplifyCongruentIVs(L);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
@@ -1119,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
"canonical IV disrupted BackedgeTaken expansion");
assert(NeedCannIV &&
"LinearFunctionTestReplace requires a canonical induction variable");
- NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
- Rewriter);
+ // Check preconditions for proper SCEVExpander operation. SCEV does not
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+ // pass that uses the SCEVExpander must do it. This does not work well for
+ // loop passes because SCEVExpander makes assumptions about all loops, while
+ // LoopPassManager only forces the current loop to be simplified.
+ //
+ // FIXME: SCEV expansion has no way to bail out, so the caller must
+ // explicitly check any assumptions made by SCEV. Brittle.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ if (!AR || AR->getLoop()->getLoopPreheader())
+ NewICmp =
+ LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
}
// Rewrite IV-derived expressions.
if (!DisableIVRewrite)
@@ -1146,9 +1860,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
- if (NewICmp)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)),
- IndVar);
+ if (NewICmp && IU)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -1156,428 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
return Changed;
}
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables. Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials. For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
- // Loop-invariant values are safe.
- if (SE->isLoopInvariant(S, L)) return true;
-
- // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
- // to transform them into efficient code.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
- return AR->isAffine();
-
- // An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
- E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L, SE)) return false;
- return true;
- }
-
- // A cast is safe if its operand is.
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L, SE);
-
- // A udiv is safe if its operands are.
- if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L, SE) &&
- isSafe(UD->getRHS(), L, SE);
-
- // SCEVUnknown is always safe.
- if (isa<SCEVUnknown>(S))
- return true;
-
- // Nothing else is safe.
- return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
- // Rewrite all induction variable expressions in terms of the canonical
- // induction variable.
- //
- // If there were induction variables of other sizes or offsets, manually
- // add the offsets to the primary induction variable and cast, avoiding
- // the need for the code evaluation methods to insert induction variables
- // of different sizes.
- for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
- Value *Op = UI->getOperandValToReplace();
- const Type *UseTy = Op->getType();
- Instruction *User = UI->getUser();
-
- // Compute the final addrec to expand into code.
- const SCEV *AR = IU->getReplacementExpr(*UI);
-
- // Evaluate the expression out of the loop, if possible.
- if (!L->contains(UI->getUser())) {
- const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (SE->isLoopInvariant(ExitVal, L))
- AR = ExitVal;
- }
-
- // FIXME: It is an extremely bad idea to indvar substitute anything more
- // complex than affine induction variables. Doing so will put expensive
- // polynomial evaluations inside of the loop, and the str reduction pass
- // currently can only reduce affine polynomials. For now just disable
- // indvar subst on anything more complex than an affine addrec, unless
- // it can be expanded to a trivial value.
- if (!isSafe(AR, L, SE))
- continue;
-
- // Determine the insertion point for this user. By default, insert
- // immediately before the user. The SCEVExpander class will automatically
- // hoist loop invariants out of the loop. For PHI nodes, there may be
- // multiple uses, so compute the nearest common dominator for the
- // incoming blocks.
- Instruction *InsertPt = User;
- if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (PHI->getIncomingValue(i) == Op) {
- if (InsertPt == User)
- InsertPt = PHI->getIncomingBlock(i)->getTerminator();
- else
- InsertPt =
- DT->findNearestCommonDominator(InsertPt->getParent(),
- PHI->getIncomingBlock(i))
- ->getTerminator();
- }
-
- // Now expand it into actual Instructions and patch it into place.
- Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
-
- if (!isValidRewrite(Op, NewVal)) {
- DeadInsts.push_back(NewVal);
- continue;
- }
- // Inform ScalarEvolution that this value is changing. The change doesn't
- // affect its value, but it does potentially affect which use lists the
- // value will be on after the replacement, which affects ScalarEvolution's
- // ability to walk use lists and drop dangling pointers when a value is
- // deleted.
- SE->forgetValue(User);
-
- // Patch the new value into place.
- if (Op->hasName())
- NewVal->takeName(Op);
- User->replaceUsesOfWith(Op, NewVal);
- UI->setOperandValToReplace(NewVal);
-
- ++NumRemoved;
- Changed = true;
-
- // The old value may be dead now.
- DeadInsts.push_back(Op);
- }
-}
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
- BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock) return;
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) return;
-
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
- BasicBlock::iterator I = Preheader->getTerminator();
- while (I != Preheader->begin()) {
- --I;
- // New instructions were inserted at the end of the preheader.
- if (isa<PHINode>(I))
- break;
-
- // Don't move instructions which might have side effects, since the side
- // effects need to complete before instructions inside the loop. Also don't
- // move instructions which might read memory, since the loop may modify
- // memory. Note that it's okay if the instruction might have undefined
- // behavior: LoopSimplify guarantees that the preheader dominates the exit
- // block.
- if (I->mayHaveSideEffects() || I->mayReadFromMemory())
- continue;
-
- // Skip debug info intrinsics.
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- // Don't sink static AllocaInsts out of the entry block, which would
- // turn them into dynamic allocas!
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- if (AI->isStaticAlloca())
- continue;
-
- // Determine if there is a use in or before the loop (direct or
- // otherwise).
- bool UsedInLoop = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
- BasicBlock *UseBB = cast<Instruction>(U)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(U)) {
- unsigned i =
- PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
- UseBB = P->getIncomingBlock(i);
- }
- if (UseBB == Preheader || L->contains(UseBB)) {
- UsedInLoop = true;
- break;
- }
- }
-
- // If there is, the def must remain in the preheader.
- if (UsedInLoop)
- continue;
-
- // Otherwise, sink it to the exit block.
- Instruction *ToMove = I;
- bool Done = false;
-
- if (I != Preheader->begin()) {
- // Skip debug info intrinsics.
- do {
- --I;
- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
-
- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
- Done = true;
- } else {
- Done = true;
- }
-
- ToMove->moveBefore(InsertPt);
- if (Done) break;
- InsertPt = ToMove;
- }
-}
-
-/// ConvertToSInt - Convert APF to an integer, if possible.
-static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
- bool isExact = false;
- if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
- return false;
- // See if we can convert this to an int64_t
- uint64_t UIntVal;
- if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
- &isExact) != APFloat::opOK || !isExact)
- return false;
- IntVal = UIntVal;
- return true;
-}
-
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-/// bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-/// bar((double)i);
-///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
- unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
- unsigned BackEdge = IncomingEdge^1;
-
- // Check incoming value.
- ConstantFP *InitValueVal =
- dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
- int64_t InitValue;
- if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
- return;
-
- // Check IV increment. Reject this PN if increment operation is not
- // an add or increment value can not be represented by an integer.
- BinaryOperator *Incr =
- dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
- // If this is not an add of the PHI with a constantfp, or if the constant fp
- // is not an integer, bail out.
- ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
- int64_t IncValue;
- if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
- !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
- return;
-
- // Check Incr uses. One user is PN and the other user is an exit condition
- // used by the conditional terminator.
- Value::use_iterator IncrUse = Incr->use_begin();
- Instruction *U1 = cast<Instruction>(*IncrUse++);
- if (IncrUse == Incr->use_end()) return;
- Instruction *U2 = cast<Instruction>(*IncrUse++);
- if (IncrUse != Incr->use_end()) return;
-
- // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
- // only used by a branch, we can't transform it.
- FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
- if (!Compare)
- Compare = dyn_cast<FCmpInst>(U2);
- if (Compare == 0 || !Compare->hasOneUse() ||
- !isa<BranchInst>(Compare->use_back()))
- return;
-
- BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
-
- // We need to verify that the branch actually controls the iteration count
- // of the loop. If not, the new IV can overflow and no one will notice.
- // The branch block must be in the loop and one of the successors must be out
- // of the loop.
- assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
- if (!L->contains(TheBr->getParent()) ||
- (L->contains(TheBr->getSuccessor(0)) &&
- L->contains(TheBr->getSuccessor(1))))
- return;
-
-
- // If it isn't a comparison with an integer-as-fp (the exit value), we can't
- // transform it.
- ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
- int64_t ExitValue;
- if (ExitValueVal == 0 ||
- !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
- return;
-
- // Find new predicate for integer comparison.
- CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
- switch (Compare->getPredicate()) {
- default: return; // Unknown comparison.
- case CmpInst::FCMP_OEQ:
- case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
- case CmpInst::FCMP_ONE:
- case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
- case CmpInst::FCMP_OGT:
- case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
- case CmpInst::FCMP_OGE:
- case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
- case CmpInst::FCMP_OLT:
- case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
- case CmpInst::FCMP_OLE:
- case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
- }
-
- // We convert the floating point induction variable to a signed i32 value if
- // we can. This is only safe if the comparison will not overflow in a way
- // that won't be trapped by the integer equivalent operations. Check for this
- // now.
- // TODO: We could use i64 if it is native and the range requires it.
-
- // The start/stride/exit values must all fit in signed i32.
- if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
- return;
-
- // If not actually striding (add x, 0.0), avoid touching the code.
- if (IncValue == 0)
- return;
-
- // Positive and negative strides have different safety conditions.
- if (IncValue > 0) {
- // If we have a positive stride, we require the init to be less than the
- // exit value and an equality or less than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
- return;
-
- uint32_t Range = uint32_t(ExitValue-InitValue);
- if (NewPred == CmpInst::ICMP_SLE) {
- // Normalize SLE -> SLT, check for infinite loop.
- if (++Range == 0) return; // Range overflows.
- }
-
- unsigned Leftover = Range % uint32_t(IncValue);
-
- // If this is an equality comparison, we require that the strided value
- // exactly land on the exit value, otherwise the IV condition will wrap
- // around and do things the fp IV wouldn't.
- if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
- Leftover != 0)
- return;
-
- // If the stride would wrap around the i32 before exiting, we can't
- // transform the IV.
- if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
- return;
-
- } else {
- // If we have a negative stride, we require the init to be greater than the
- // exit value and an equality or greater than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
- return;
-
- uint32_t Range = uint32_t(InitValue-ExitValue);
- if (NewPred == CmpInst::ICMP_SGE) {
- // Normalize SGE -> SGT, check for infinite loop.
- if (++Range == 0) return; // Range overflows.
- }
-
- unsigned Leftover = Range % uint32_t(-IncValue);
-
- // If this is an equality comparison, we require that the strided value
- // exactly land on the exit value, otherwise the IV condition will wrap
- // around and do things the fp IV wouldn't.
- if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
- Leftover != 0)
- return;
-
- // If the stride would wrap around the i32 before exiting, we can't
- // transform the IV.
- if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
- return;
- }
-
- const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
-
- // Insert new integer induction variable.
- PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
- NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
- PN->getIncomingBlock(IncomingEdge));
-
- Value *NewAdd =
- BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
- Incr->getName()+".int", Incr);
- NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
-
- ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
- ConstantInt::get(Int32Ty, ExitValue),
- Compare->getName());
-
- // In the following deletions, PN may become dead and may be deleted.
- // Use a WeakVH to observe whether this happens.
- WeakVH WeakPH = PN;
-
- // Delete the old floating point exit comparison. The branch starts using the
- // new comparison.
- NewCompare->takeName(Compare);
- Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare);
-
- // Delete the old floating point increment.
- Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- RecursivelyDeleteTriviallyDeadInstructions(Incr);
-
- // If the FP induction variable still has uses, this is because something else
- // in the loop uses its value. In order to canonicalize the induction
- // variable, we chose to eliminate the IV and rewrite it in terms of an
- // int->fp cast.
- //
- // We give preference to sitofp over uitofp because it is faster on most
- // platforms.
- if (WeakPH) {
- Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstNonPHI());
- PN->replaceAllUsesWith(Conv);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
- }
-
- // Add a new IVUsers entry for the newly-created integer PHI.
- IU->AddUsersIfInteresting(NewPHI, NewPHI);
-}
OpenPOWER on IntegriCloud