summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-06-12 15:42:51 +0000
committerdim <dim@FreeBSD.org>2011-06-12 15:42:51 +0000
commitece02cd5829cea836e9365b0845a8ef042d17b0a (patch)
treeb3032e51d630e8070e9e08d6641648f195316a80 /lib/Transforms/Scalar
parent2b066988909948dc3d53d01760bc2d71d32f3feb (diff)
downloadFreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.zip
FreeBSD-src-ece02cd5829cea836e9365b0845a8ef042d17b0a.tar.gz
Vendor import of llvm trunk r132879:
http://llvm.org/svn/llvm-project/llvm/trunk@132879
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp26
-rw-r--r--lib/Transforms/Scalar/GVN.cpp15
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp761
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp9
-rw-r--r--lib/Transforms/Scalar/LICM.cpp29
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp115
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp80
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp48
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp45
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp54
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp9
13 files changed, 941 insertions, 255 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 0184390..0af14ed 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -147,7 +147,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!DisableBranchOpts) {
MadeChange = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- MadeChange |= ConstantFoldTerminator(BB);
+ MadeChange |= ConstantFoldTerminator(BB, true);
if (MadeChange)
ModifiedDT = true;
@@ -371,9 +371,11 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
- if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+ TargetLowering::TypePromoteInteger)
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+ TargetLowering::TypePromoteInteger)
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
@@ -548,7 +550,23 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
-
+
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI))
+ if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()))
+ if (!VI->isTerminator() &&
+ (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstNonPHI());
+ else
+ DVI->insertAfter(VI);
+ return true;
+ }
+
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index efecb97..2515fd1 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -952,12 +952,12 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
cast<PointerType>(PtrVal->getType())->getAddressSpace());
-
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
NewLoad->takeName(SrcVal);
NewLoad->setAlignment(SrcVal->getAlignment());
-
+
DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
@@ -1576,6 +1576,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ // Transfer DebugLoc.
+ NewLoad->setDebugLoc(LI->getDebugLoc());
+
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
NewLoad));
@@ -1604,6 +1607,11 @@ bool GVN::processLoad(LoadInst *L) {
if (L->isVolatile())
return false;
+ if (L->use_empty()) {
+ markInstructionForDeletion(L);
+ return true;
+ }
+
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
@@ -2099,6 +2107,7 @@ bool GVN::performPRE(Function &F) {
PREInstr->insertBefore(PREPred->getTerminator());
PREInstr->setName(CurInst->getName() + ".pre");
+ PREInstr->setDebugLoc(CurInst->getDebugLoc());
predMap[PREPred] = PREInstr;
VN.add(PREInstr, ValNo);
++NumGVNPRE;
@@ -2118,7 +2127,7 @@ bool GVN::performPRE(Function &F) {
VN.add(Phi, ValNo);
addToLeaderTable(ValNo, Phi, CurrentBlock);
-
+ Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
if (Phi->getType()->isPointerTy()) {
// Because we have added a PHI-use of the pointer value, it has now
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 09d569a..04ee7c8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -52,20 +52,30 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumInserted, "Number of canonical indvars added");
STATISTIC(NumReplaced, "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
+// and referenced here.
+namespace llvm {
+ extern bool DisableIVRewrite;
+}
namespace {
class IndVarSimplify : public LoopPass {
@@ -73,12 +83,13 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
+ TargetData *TD;
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID) {
+ IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -101,15 +112,18 @@ namespace {
private:
bool isValidRewrite(Value *FromVal, Value *ToVal);
- void EliminateIVComparisons();
- void EliminateIVRemainders();
+ void SimplifyIVUsers(SCEVExpander &Rewriter);
+ void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void EliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned,
+ PHINode *IVPhi);
void RewriteNonIntegerIVs(Loop *L);
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- BasicBlock *ExitingBlock,
- BranchInst *BI,
- SCEVExpander &Rewriter);
+ PHINode *IndVar,
+ SCEVExpander &Rewriter);
+
void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
@@ -122,7 +136,7 @@ namespace {
char IndVarSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
- "Canonicalize Induction Variables", false, false)
+ "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -130,7 +144,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
- "Canonicalize Induction Variables", false, false)
+ "Induction Variable Simplification", false, false)
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -183,17 +197,23 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
return true;
}
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable. This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
- const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- BasicBlock *ExitingBlock,
- BranchInst *BI,
- SCEVExpander &Rewriter) {
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ BackedgeTakenCount->isZero())
+ return false;
+
+ if (!L->getExitingBlock())
+ return false;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return false;
+
// Special case: If the backedge-taken count is a UDiv, it's very likely a
// UDiv that ScalarEvolution produced in order to compute a precise
// expression, rather than a UDiv from the user's code. If we can't find a
@@ -201,23 +221,68 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// rewriting the loop.
if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return 0;
+ if (!OrigCond) return false;
const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
if (R != BackedgeTakenCount) {
const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
if (L != BackedgeTakenCount)
- return 0;
+ return false;
}
}
+ return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
+static const Type *getBackedgeIVType(Loop *L) {
+ if (!L->getExitingBlock())
+ return 0;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return 0;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return 0;
+
+ const Type *Ty = 0;
+ for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+ OI != OE; ++OI) {
+ assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+ TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+ if (!Trunc)
+ continue;
+
+ return Trunc->getSrcTy();
+ }
+ return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+ const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ SCEVExpander &Rewriter) {
+ assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+ BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
// If the exiting block is not the same as the backedge block, we must compare
// against the preincremented value, otherwise we prefer to compare against
// the post-incremented value.
Value *CmpIndVar;
const SCEV *RHS = BackedgeTakenCount;
- if (ExitingBlock == L->getLoopLatch()) {
+ if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
@@ -240,7 +305,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
// We have to use the preincremented value...
RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
@@ -418,96 +483,519 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
SE->forgetLoop(L);
}
-void IndVarSimplify::EliminateIVComparisons() {
- // Look for ICmp users.
- for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
- IVStrideUse &UI = *I;
- ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser());
- if (!ICmp) continue;
-
- bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1);
- ICmpInst::Predicate Pred = ICmp->getPredicate();
- if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred);
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = IU->getReplacementExpr(UI);
- const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // If the condition is always true or always false, replace it with
- // a constant value.
- if (SE->isKnownPredicate(Pred, S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- else
- continue;
+namespace {
+ // Collect information about induction variables that are used by sign/zero
+ // extend operations. This information is recorded by CollectExtend and
+ // provides the input to WidenIV.
+ struct WideIVInfo {
+ const Type *WidestNativeType; // Widest integer type created [sz]ext
+ bool IsSigned; // Was an sext user seen before a zext?
+
+ WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+ };
+ typedef std::map<PHINode *, WideIVInfo> WideIVMap;
+}
+
+/// CollectExtend - Update information about the induction variable that is
+/// extended by this sign or zero extend operation. This is used to determine
+/// the final width of the IV before actually widening it.
+static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned,
+ WideIVMap &IVMap, ScalarEvolution *SE,
+ const TargetData *TD) {
+ const Type *Ty = Cast->getType();
+ uint64_t Width = SE->getTypeSizeInBits(Ty);
+ if (TD && !TD->isLegalInteger(Width))
+ return;
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- DeadInsts.push_back(ICmp);
+ WideIVInfo &IVInfo = IVMap[Phi];
+ if (!IVInfo.WidestNativeType) {
+ IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ IVInfo.IsSigned = IsSigned;
+ return;
}
+
+ // We extend the IV to satisfy the sign of its first user, arbitrarily.
+ if (IVInfo.IsSigned != IsSigned)
+ return;
+
+ if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType))
+ IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
}
-void IndVarSimplify::EliminateIVRemainders() {
- // Look for SRem and URem users.
- for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
- IVStrideUse &UI = *I;
- BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser());
- if (!Rem) continue;
+namespace {
+/// WidenIV - The goal of this transform is to remove sign and zero extends
+/// without creating any new induction variables. To do this, it creates a new
+/// phi of the wider type and redirects all users, either removing extends or
+/// inserting truncs whenever we stop propagating the type.
+///
+class WidenIV {
+ PHINode *OrigPhi;
+ const Type *WideType;
+ bool IsSigned;
+
+ IVUsers *IU;
+ LoopInfo *LI;
+ Loop *L;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ PHINode *WidePhi;
+ Instruction *WideInc;
+ const SCEV *WideIncExpr;
+
+ SmallPtrSet<Instruction*,16> Processed;
+
+public:
+ WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers,
+ LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree,
+ SmallVectorImpl<WeakVH> &DI) :
+ OrigPhi(PN),
+ WideType(IVInfo.WidestNativeType),
+ IsSigned(IVInfo.IsSigned),
+ IU(IUsers),
+ LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())),
+ SE(SEv),
+ DT(DTree),
+ DeadInsts(DI),
+ WidePhi(0),
+ WideInc(0),
+ WideIncExpr(0) {
+ assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+ }
- bool isSigned = Rem->getOpcode() == Instruction::SRem;
- if (!isSigned && Rem->getOpcode() != Instruction::URem)
- continue;
+ bool CreateWideIV(SCEVExpander &Rewriter);
- // We're only interested in the case where we know something about
- // the numerator.
- if (UI.getOperandValToReplace() != Rem->getOperand(0))
- continue;
+protected:
+ Instruction *CloneIVUser(Instruction *NarrowUse,
+ Instruction *NarrowDef,
+ Instruction *WideDef);
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(Rem->getOperand(0));
- const SCEV *X = SE->getSCEV(Rem->getOperand(1));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // i % n --> i if i is in [0,n).
- if ((!isSigned || SE->isKnownNonNegative(S)) &&
- SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- S, X))
- Rem->replaceAllUsesWith(Rem->getOperand(0));
- else {
- // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne =
- SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
- if ((!isSigned || SE->isKnownNonNegative(LessOne)) &&
- SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- LessOne, X)) {
- ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
- Rem->getOperand(0), Rem->getOperand(1),
- "tmp");
- SelectInst *Sel =
- SelectInst::Create(ICmp,
- ConstantInt::get(Rem->getType(), 0),
- Rem->getOperand(0), "tmp", Rem);
- Rem->replaceAllUsesWith(Sel);
- } else
+ const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+
+ Instruction *WidenIVUse(Instruction *NarrowUse,
+ Instruction *NarrowDef,
+ Instruction *WideDef);
+};
+} // anonymous namespace
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+ WideIVMap IVMap;
+
+ // Each round of simplification involves a round of eliminating operations
+ // followed by a round of widening IVs. A single IVUsers worklist is used
+ // across all rounds. The inner loop advances the user. If widening exposes
+ // more uses, then another pass through the outer loop is triggered.
+ for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) {
+ for(; I != E; ++I) {
+ Instruction *UseInst = I->getUser();
+ Value *IVOperand = I->getOperandValToReplace();
+
+ if (DisableIVRewrite) {
+ if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+ CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD);
+ continue;
+ }
+ }
+ }
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ EliminateIVComparison(ICmp, IVOperand);
continue;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi());
+ continue;
+ }
+ }
+ }
+ for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end();
+ I != E; ++I) {
+ WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts);
+ if (Widener.CreateWideIV(Rewriter))
+ Changed = true;
}
+ }
+}
- // Inform IVUsers about the new users.
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I);
+static Value *getExtend( Value *NarrowOper, const Type *WideType,
+ bool IsSigned, IRBuilder<> &Builder) {
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
+}
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- DeadInsts.push_back(Rem);
+/// CloneIVUser - Instantiate a wide operation to replace a narrow
+/// operation. This only needs to handle operations that can evaluation to
+/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
+Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
+ Instruction *NarrowDef,
+ Instruction *WideDef) {
+ unsigned Opcode = NarrowUse->getOpcode();
+ switch (Opcode) {
+ default:
+ return 0;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
+
+ IRBuilder<> Builder(NarrowUse);
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know
+ // anything about the narrow operand yet so must insert a [sz]ext. It is
+ // probably loop invariant and will be folded or hoisted. If it actually
+ // comes from a widened IV, it should be removed during a future call to
+ // WidenIVUse.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
+ getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
+ getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+
+ BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
+ LHS, RHS,
+ NarrowBO->getName());
+ Builder.Insert(WideBO);
+ if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+ if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+
+ return WideBO;
}
+ llvm_unreachable(0);
+}
+
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+ if (!SE->isSCEVable(NarrowUse->getType()))
+ return 0;
+
+ const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+ const SCEV *WideExpr = IsSigned ?
+ SE->getSignExtendExpr(NarrowExpr, WideType) :
+ SE->getZeroExtendExpr(NarrowExpr, WideType);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+
+ return AddRec;
+}
+
+/// HoistStep - Attempt to hoist an IV increment above a potential use.
+///
+/// To successfully hoist, two criteria must be met:
+/// - IncV operands dominate InsertPos and
+/// - InsertPos dominates IncV
+///
+/// Meeting the second condition means that we don't need to check all of IncV's
+/// existing uses (it's moving up in the domtree).
+///
+/// This does not yet recursively hoist the operands, although that would
+/// not be difficult.
+static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
+ const DominatorTree *DT)
+{
+ if (DT->dominates(IncV, InsertPos))
+ return true;
+
+ if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ // Attempt to hoist IncV
+ for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
+ OI != OE; ++OI) {
+ Instruction *OInst = dyn_cast<Instruction>(OI);
+ if (OInst && !DT->dominates(OInst, InsertPos))
+ return false;
+ }
+ IncV->moveBefore(InsertPos);
+ return true;
+}
+
+/// WidenIVUse - Determine whether an individual user of the narrow IV can be
+/// widened. If so, return the wide clone of the user.
+Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
+ Instruction *NarrowDef,
+ Instruction *WideDef) {
+ // To be consistent with IVUsers, stop traversing the def-use chain at
+ // inner-loop phis or post-loop phis.
+ if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
+ return 0;
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Processed.insert(NarrowUse))
+ return 0;
+
+ // Our raison d'etre! Eliminate sign and zero extension.
+ if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
+ Value *NewDef = WideDef;
+ if (NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(NarrowUse);
+ NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *NarrowUse << "\n");
+ NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
+ NewDef = NarrowUse;
+ }
+ }
+ if (NewDef != NarrowUse) {
+ DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
+ << " replaced by " << *WideDef << "\n");
+ ++NumElimExt;
+ NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.push_back(NarrowUse);
+ }
+ // Now that the extend is gone, expose it's uses to IVUsers for potential
+ // further simplification within SimplifyIVUsers.
+ IU->AddUsersIfInteresting(WideDef, WidePhi);
+
+ // No further widening is needed. The deceased [sz]ext had done it for us.
+ return 0;
+ }
+ const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
+ if (!WideAddRec) {
+ // This user does not evaluate to a recurence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ IRBuilder<> Builder(NarrowUse);
+ Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
+ NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
+ return 0;
+ }
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = 0;
+ if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) {
+ WideUse = WideInc;
+ }
+ else {
+ WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef);
+ if (!WideUse)
+ return 0;
+ }
+ // GetWideRecurrence ensured that the narrow expression could be extended
+ // outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simple throw away the newly created wide use.
+ if (WideAddRec != SE->getSCEV(WideUse)) {
+ DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
+ << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
+ DeadInsts.push_back(WideUse);
+ return 0;
+ }
+
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+}
+
+/// CreateWideIV - Process a single induction variable. First use the
+/// SCEVExpander to create a wide induction variable that evaluates to the same
+/// recurrence as the original narrow IV. Then use a worklist to forward
+/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all
+/// interesting IV users, the narrow IV will be isolated for removal by
+/// DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+///
+bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+ // Is this phi an induction variable?
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+ if (!AddRec)
+ return false;
+
+ // Widen the induction variable expression.
+ const SCEV *WideIVExpr = IsSigned ?
+ SE->getSignExtendExpr(AddRec, WideType) :
+ SE->getZeroExtendExpr(AddRec, WideType);
+
+ assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+ "Expect the new IV expression to preserve its type");
+
+ // Can the IV be extended outside the loop without overflow?
+ AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return false;
+
+ // An AddRec must have loop-invariant operands. Since this AddRec it
+ // materialized by a loop header phi, the expression cannot have any post-loop
+ // operands, so they must dominate the loop header.
+ assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
+ && "Loop header phi recurrence inputs do not dominate the loop");
+
+ // The rewriter provides a value for the desired IV expression. This may
+ // either find an existing phi or materialize a new one. Either way, we
+ // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+ // of the phi-SCC dominates the loop entry.
+ Instruction *InsertPt = L->getHeader()->begin();
+ WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
+
+ // Remembering the WideIV increment generated by SCEVExpander allows
+ // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // employ a general reuse mechanism because the call above is the only call to
+ // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ WideInc =
+ cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ WideIncExpr = SE->getSCEV(WideInc);
+ }
+
+ DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+ ++NumWidened;
+
+ // Traverse the def-use chain using a worklist starting at the original IV.
+ assert(Processed.empty() && "expect initial state" );
+
+ // Each worklist entry has a Narrow def-use link and Wide def.
+ SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
+ for (Value::use_iterator UI = OrigPhi->use_begin(),
+ UE = OrigPhi->use_end(); UI != UE; ++UI) {
+ NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi));
+ }
+ while (!NarrowIVUsers.empty()) {
+ Use *NarrowDefUse;
+ Instruction *WideDef;
+ tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val();
+
+ // Process a def-use edge. This may replace the use, so don't hold a
+ // use_iterator across it.
+ Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get());
+ Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser());
+ Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef);
+
+ // Follow all def-use edges from the previous narrow use.
+ if (WideUse) {
+ for (Value::use_iterator UI = NarrowUse->use_begin(),
+ UE = NarrowUse->use_end(); UI != UE; ++UI) {
+ NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse));
+ }
+ }
+ // WidenIVUse may have removed the def-use edge.
+ if (NarrowDef->use_empty())
+ DeadInsts.push_back(NarrowDef);
+ }
+ return true;
+}
+
+void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
+ Changed = true;
+ DeadInsts.push_back(ICmp);
+}
+
+void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned,
+ PHINode *IVPhi) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1),
+ "tmp");
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ // Inform IVUsers about the new users.
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I, IVPhi);
+
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.push_back(Rem);
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -526,6 +1014,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
DeadInsts.clear();
Changed = false;
@@ -533,11 +1023,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// transform them to use integer recurrences.
RewriteNonIntegerIVs(L);
- BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE);
+ if (DisableIVRewrite)
+ Rewriter.disableCanonicalMode();
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -548,33 +1039,42 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, Rewriter);
- // Simplify ICmp IV users.
- EliminateIVComparisons();
-
- // Simplify SRem and URem IV users.
- EliminateIVRemainders();
+ // Eliminate redundant IV users.
+ SimplifyIVUsers(Rewriter);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
const Type *LargestType = 0;
bool NeedCannIV = false;
- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
- LargestType = BackedgeTakenCount->getType();
- LargestType = SE->getEffectiveSCEVType(LargestType);
+ bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
+ if (ExpandBECount) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
- if (ExitingBlock)
- NeedCannIV = true;
- }
- for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
- const Type *Ty =
- SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
+ NeedCannIV = true;
+ const Type *Ty = BackedgeTakenCount->getType();
+ if (DisableIVRewrite) {
+ // In this mode, SimplifyIVUsers may have already widened the IV used by
+ // the backedge test and inserted a Trunc on the compare's operand. Get
+ // the wider type to avoid creating a redundant narrow IV only used by the
+ // loop test.
+ LargestType = getBackedgeIVType(L);
+ }
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
+ SE->getTypeSizeInBits(LargestType))
+ LargestType = SE->getEffectiveSCEVType(Ty);
+ }
+ if (!DisableIVRewrite) {
+ for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+ NeedCannIV = true;
+ const Type *Ty =
+ SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
+ if (!LargestType ||
+ SE->getTypeSizeInBits(Ty) >
SE->getTypeSizeInBits(LargestType))
- LargestType = Ty;
- NeedCannIV = true;
+ LargestType = Ty;
+ }
}
// Now that we know the largest of the induction variable expressions
@@ -614,19 +1114,17 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
ICmpInst *NewICmp = 0;
- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
- !BackedgeTakenCount->isZero() &&
- ExitingBlock) {
+ if (ExpandBECount) {
+ assert(canExpandBackedgeTakenCount(L, SE) &&
+ "canonical IV disrupted BackedgeTaken expansion");
assert(NeedCannIV &&
"LinearFunctionTestReplace requires a canonical induction variable");
- // Can't rewrite non-branch yet.
- if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
- NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
- ExitingBlock, BI, Rewriter);
+ NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Rewriter);
}
-
// Rewrite IV-derived expressions.
- RewriteIVExpressions(L, Rewriter);
+ if (!DisableIVRewrite)
+ RewriteIVExpressions(L, Rewriter);
// Clear the rewriter cache, because values that are in the rewriter's cache
// can be deleted in the loop below, causing the AssertingVH in the cache to
@@ -649,7 +1147,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
if (NewICmp)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)),
+ IndVar);
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -1080,5 +1579,5 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
}
// Add a new IVUsers entry for the newly-created integer PHI.
- IU->AddUsersIfInteresting(NewPHI);
+ IU->AddUsersIfInteresting(NewPHI, NewPHI);
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 7168177..cf18ff0 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -706,7 +706,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding terminator: " << *BB->getTerminator() << '\n');
++NumFolds;
- ConstantFoldTerminator(BB);
+ ConstantFoldTerminator(BB, true);
return true;
}
@@ -929,9 +929,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
LI->getAlignment(),
UnavailablePred->getTerminator());
+ NewVal->setDebugLoc(LI->getDebugLoc());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -944,6 +945,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
LoadBB->begin());
PN->takeName(LI);
+ PN->setDebugLoc(LI->getDebugLoc());
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
@@ -1375,7 +1377,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 93de9cf..13bd022 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -372,7 +372,11 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
LI->getMetadata(LLVMContext::MD_tbaa));
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- // Handle obvious cases efficiently.
+ // Don't sink or hoist dbg info; it's legal, but not useful.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Handle simple cases by querying alias analysis.
AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == AliasAnalysis::DoesNotAccessMemory)
return true;
@@ -445,8 +449,7 @@ void LICM::sink(Instruction &I) {
// enough that we handle it as a special (more efficient) case. It is more
// efficient to handle because there are no PHI nodes that need to be placed.
if (ExitBlocks.size() == 1) {
- if (!isa<DbgInfoIntrinsic>(I) &&
- !DT->dominates(I.getParent(), ExitBlocks[0])) {
+ if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
// Instruction is not used, just delete it.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
@@ -602,13 +605,15 @@ namespace {
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
AliasSetTracker &AST;
+ DebugLoc DL;
public:
LoopPromoter(Value *SP,
const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast)
- : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
- LoopExitBlocks(LEB), AST(ast) {}
+ SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
+ DebugLoc dl)
+ : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP),
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {}
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const {
@@ -629,7 +634,8 @@ namespace {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
Instruction *InsertPos = ExitBlock->getFirstNonPHI();
- new StoreInst(LiveInValue, SomePtr, InsertPos);
+ StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+ NewSI->setDebugLoc(DL);
}
}
@@ -727,6 +733,12 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
Changed = true;
++NumPromoted;
+ // Grab a debug location for the inserted loads/stores; given that the
+ // inserted loads/stores have little relation to the original loads/stores,
+ // this code just arbitrarily picks a location from one, since any debug
+ // location is better than none.
+ DebugLoc DL = LoopUses[0]->getDebugLoc();
+
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -734,13 +746,14 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- *CurAST);
+ *CurAST, DL);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad =
new LoadInst(SomePtr, SomePtr->getName()+".promoted",
Preheader->getTerminator());
+ PreheaderLoad->setDebugLoc(DL);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
// Rewrite all the loads in the loop and remember all the definitions from
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 1366231..dbf6eec 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -128,11 +128,11 @@ INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
-/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// deleteDeadInstruction - Delete this instruction. Before we do, go through
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
-static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -162,6 +162,14 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
} while (!NowDeadInsts.empty());
}
+/// deleteIfDeadInstruction - If the specified value is a dead instruction,
+/// delete it and any recursively used instructions.
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (isInstructionTriviallyDead(I))
+ deleteDeadInstruction(I, SE);
+}
+
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
@@ -454,31 +462,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
return false;
}
-
- // Okay, we have a strided store "p[i]" of a splattable value. We can turn
- // this into a memset in the loop preheader now if we want. However, this
- // would be unsafe to do if there is anything else in the loop that may read
- // or write to the aliased location. Check for an alias.
- if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
- CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
- return false;
-
- // Okay, everything looks good, insert the memset.
- BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
- IRBuilder<> Builder(Preheader->getTerminator());
-
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
- // header. Just insert code for it in the preheader.
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE);
-
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for any overlap by generating the
+ // base pointer and checking the region.
unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(BasePtr, *SE);
+ return false;
+ }
+
+ // Okay, everything looks good, insert the memset.
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
@@ -521,7 +533,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- DeleteDeadInstruction(TheStore, *SE);
+ deleteDeadInstruction(TheStore, *SE);
++NumMemSet;
return true;
}
@@ -539,41 +551,51 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE);
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
- // or write to the stored location (including the load feeding the stores).
- // Check for an alias.
- if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+ // or write the memory region we're storing to. This includes the load that
+ // feeds the stores. Check for an alias by generating the base address and
+ // checking everything.
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI))
+ getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(StoreBasePtr, *SE);
return false;
+ }
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
- if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
- CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI))
- return false;
-
- // Okay, everything looks good, insert the memcpy.
- BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
- IRBuilder<> Builder(Preheader->getTerminator());
-
- // The trip count of the loop and the base pointer of the addrec SCEV is
- // guaranteed to be loop invariant, which means that it should dominate the
- // header. Just insert code for it in the preheader.
- SCEVExpander Expander(*SE);
-
Value *LoadBasePtr =
Expander.expandCodeFor(LoadEv->getStart(),
Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
Preheader->getTerminator());
- Value *StoreBasePtr =
- Expander.expandCodeFor(StoreEv->getStart(),
- Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(LoadBasePtr, *SE);
+ deleteIfDeadInstruction(StoreBasePtr, *SE);
+ return false;
+ }
+
+ // Okay, everything is safe, we can transform this!
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
@@ -589,18 +611,19 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
- Value *NewCall =
+ CallInst *NewCall =
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
std::min(SI->getAlignment(), LI->getAlignment()));
+ NewCall->setDebugLoc(SI->getDebugLoc());
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
- (void)NewCall;
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- DeleteDeadInstruction(SI, *SE);
+ deleteDeadInstruction(SI, *SE);
++NumMemCpy;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 5abc790..73ebd61 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -209,7 +209,12 @@ struct Formula {
/// when AM.Scale is not zero.
const SCEV *ScaledReg;
- Formula() : ScaledReg(0) {}
+ /// UnfoldedOffset - An additional constant offset which added near the
+ /// use. This requires a temporary register, but the offset itself can
+ /// live in an add immediate field rather than a register.
+ int64_t UnfoldedOffset;
+
+ Formula() : ScaledReg(0), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
@@ -379,6 +384,10 @@ void Formula::print(raw_ostream &OS) const {
OS << "<unknown>";
OS << ')';
}
+ if (UnfoldedOffset != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << "imm(" << UnfoldedOffset << ')';
+ }
}
void Formula::dump() const {
@@ -771,8 +780,10 @@ void Cost::RateFormula(const Formula &F,
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
}
- if (F.BaseRegs.size() > 1)
- NumBaseAdds += F.BaseRegs.size() - 1;
+ // Determine how many (unfolded) adds we'll need inside the loop.
+ size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ if (NumBaseParts > 1)
+ NumBaseAdds += NumBaseParts - 1;
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
@@ -1793,7 +1804,8 @@ LSRInstance::OptimizeLoopTermCond() {
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
- CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
+ CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(),
+ CondUse->getPhi());
TermBr->replaceUsesOfWith(OldCond, Cond);
}
}
@@ -1945,7 +1957,8 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale) {
+ F.AM.Scale == OrigF.AM.Scale &&
+ F.UnfoldedOffset == OrigF.UnfoldedOffset) {
if (F.AM.BaseOffs == 0)
return &LU;
// This is the formula where all the registers and symbols matched;
@@ -2061,6 +2074,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (SE.isLoopInvariant(N, L)) {
+ // S is normalized, so normalize N before folding it into S
+ // to keep the result normalized.
+ N = TransformForPostIncUse(Normalize, N, CI, 0,
+ LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
@@ -2313,8 +2330,29 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
if (InnerSum->isZero())
continue;
Formula F = Base;
- F.BaseRegs[i] = InnerSum;
- F.BaseRegs.push_back(*J);
+
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (TLI && InnerSumSC &&
+ SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue();
+ F.BaseRegs.erase(F.BaseRegs.begin() + i);
+ } else
+ F.BaseRegs[i] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
// it.
@@ -2482,6 +2520,15 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
continue;
}
+ // Check that multiplying with the unfolded offset doesn't overflow.
+ if (F.UnfoldedOffset != 0) {
+ if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
+ continue;
+ F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
+ if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
+ continue;
+ }
+
// If we make it here and it's legal, add it.
(void)InsertFormula(LU, LUIdx, F);
next:;
@@ -2664,7 +2711,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// other orig regs.
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), prior(Imms.end()),
- Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
@@ -2738,8 +2785,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
- continue;
+ LU.Kind, LU.AccessTy, TLI)) {
+ if (!TLI ||
+ !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ continue;
+ NewF = F;
+ NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
+ }
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
// If the new formula has a constant in a register, and adding the
@@ -3488,6 +3540,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
}
+ // Expand the unfolded offset portion.
+ int64_t UnfoldedOffset = F.UnfoldedOffset;
+ if (UnfoldedOffset != 0) {
+ // Just add the immediate values.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
+ UnfoldedOffset)));
+ }
+
// Emit instructions summing all the operands.
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index b4e3d31..e05f29c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -258,6 +258,7 @@ bool LoopUnswitch::processCurrentLoop() {
if (LoopCond && SI->getNumCases() > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
+ // FIXME: scan for a case with a non-critical edge?
Constant *UnswitchVal = SI->getCaseValue(1);
// Do not process same value again and again.
if (!UnswitchedVals.insert(UnswitchVal))
@@ -560,6 +561,8 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
BasicBlock *ExitBlock = ExitBlocks[i];
SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
pred_end(ExitBlock));
+ // Although SplitBlockPredecessors doesn't preserve loop-simplify in
+ // general, if we call it on all predecessors of all exits then it does.
SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
".us-lcssa", this);
}
@@ -786,8 +789,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
// If this is the edge to the header block for a loop, remove the loop and
// promote all subloops.
if (Loop *BBLoop = LI->getLoopFor(BB)) {
- if (BBLoop->getLoopLatch() == BB)
+ if (BBLoop->getLoopLatch() == BB) {
RemoveLoopFromHierarchy(BBLoop);
+ if (currentLoop == BBLoop) {
+ currentLoop = 0;
+ redoLoop = false;
+ }
+ }
}
// Remove the block from the loop info, which removes it from any loops it
@@ -859,7 +867,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
// selects, switches.
- std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
std::vector<Instruction*> Worklist;
LLVMContext &Context = Val->getContext();
@@ -875,13 +882,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
!cast<ConstantInt>(Val)->getZExtValue());
- for (unsigned i = 0, e = Users.size(); i != e; ++i)
- if (Instruction *U = cast<Instruction>(Users[i])) {
- if (!L->contains(U))
- continue;
- U->replaceUsesOfWith(LIC, Replacement);
- Worklist.push_back(U);
- }
+ for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+ UI != E; ++UI) {
+ Instruction *U = dyn_cast<Instruction>(*UI);
+ if (!U || !L->contains(U))
+ continue;
+ U->replaceUsesOfWith(LIC, Replacement);
+ Worklist.push_back(U);
+ }
SimplifyCode(Worklist, L);
return;
}
@@ -889,9 +897,10 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// Otherwise, we don't know the precise value of LIC, but we do know that it
// is certainly NOT "Val". As such, simplify any uses in the loop that we
// can. This case occurs when we unswitch switch statements.
- for (unsigned i = 0, e = Users.size(); i != e; ++i) {
- Instruction *U = cast<Instruction>(Users[i]);
- if (!L->contains(U))
+ for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
+ UI != E; ++UI) {
+ Instruction *U = dyn_cast<Instruction>(*UI);
+ if (!U || !L->contains(U))
continue;
Worklist.push_back(U);
@@ -909,13 +918,22 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// Found a dead case value. Don't remove PHI nodes in the
// successor if they become single-entry, those PHI nodes may
// be in the Users list.
-
+
+ BasicBlock *Switch = SI->getParent();
+ BasicBlock *SISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
+ // If the DeadCase successor dominates the loop latch, then the
+ // transformation isn't safe since it will delete the sole predecessor edge
+ // to the latch.
+ if (Latch && DT->dominates(SISucc, Latch))
+ continue;
+
// FIXME: This is a hack. We need to keep the successor around
// and hooked up so as to preserve the loop structure, because
// trying to update it is complicated. So instead we preserve the
// loop structure and put the block on a dead code path.
- BasicBlock *Switch = SI->getParent();
- SplitEdge(Switch, SI->getSuccessor(DeadCase), this);
+ SplitEdge(Switch, SISucc, this);
// Compute the successors instead of relying on the return value
// of SplitEdge, since it may have split the switch successor
// after PHI nodes.
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a3035cb..be5aa2e 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
@@ -459,7 +460,10 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
dbgs() << *Range.TheStores[i] << '\n';
dbgs() << "With: " << *AMemSet << '\n');
-
+
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+
// Zap all the stores.
for (SmallVector<Instruction*, 16>::const_iterator
SI = Range.TheStores.begin(),
@@ -484,11 +488,28 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
if (!LI->isVolatile() && LI->hasOneUse()) {
- MemDepResult dep = MD->getDependency(LI);
+ MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
- if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
- C = dyn_cast<CallInst>(dep.getInst());
-
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+
+ if (C) {
+ // Check that nothing touches the dest of the "copy" between
+ // the call and the store.
+ MemDepResult sdep = MD->getDependency(SI);
+ if (!sdep.isNonLocal()) {
+ bool FoundCall = false;
+ for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) {
+ if (&*I == C) {
+ FoundCall = true;
+ break;
+ }
+ }
+ if (!FoundCall)
+ C = 0;
+ }
+ }
+
if (C) {
bool changed = performCallSlotOptzn(LI,
SI->getPointerOperand()->stripPointerCasts(),
@@ -863,12 +884,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
return false;
- // Get the alignment of the byval. If it is greater than the memcpy, then we
- // can't do the substitution. If the call doesn't specify the alignment, then
- // it is some target specific value that we can't know.
+ // Get the alignment of the byval. If the call doesn't specify the alignment,
+ // then it is some target specific value that we can't know.
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
- if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
- return false;
+ if (ByValAlign == 0) return false;
+
+ // If it is greater than the memcpy, then we check to see if we can force the
+ // source of the memcpy to the alignment we need. If we fail, we bail out.
+ if (MDep->getAlignment() < ByValAlign &&
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+ return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index db8eb85..083412e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -655,7 +655,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
// Just mark all destinations executable!
// TODO: This could be improved if the operand is a [cast of a] BlockAddress.
- if (isa<IndirectBrInst>(&TI))
+ if (isa<IndirectBrInst>(TI))
return true;
#ifndef NDEBUG
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 8178c27..8938b28 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -30,6 +30,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -341,7 +342,8 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
- if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) {
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+ (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
if (!VectorTy) {
VectorTy = VectorType::get(In, AllocaSize/EltSize);
return;
@@ -741,8 +743,9 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
- if (ToTypeSize == AllocaSize) {
+ if (FromTypeSize == ToTypeSize) {
// If the two types have the same primitive size, use a bit cast.
// Otherwise, it is two vectors with the same element type that has
// the same allocation size but different number of elements so use
@@ -754,13 +757,13 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
return CreateShuffleVectorCast(FromVal, ToType, Builder);
}
- if (isPowerOf2_64(AllocaSize / ToTypeSize)) {
+ if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
"of a smaller vector type at a nonzero offset.");
const Type *CastElementTy = getScaledElementType(FromType, ToType,
ToTypeSize * 8);
- unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+ unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
LLVMContext &Context = FromVal->getContext();
const Type *CastTy = VectorType::get(CastElementTy,
@@ -1051,8 +1054,9 @@ namespace {
class AllocaPromoter : public LoadAndStorePromoter {
AllocaInst *AI;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
- : LoadAndStorePromoter(Insts, S), AI(0) {}
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ DbgDeclareInst *DD, DIBuilder *&DB)
+ : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {}
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
@@ -1329,7 +1333,6 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
return true;
}
-
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
DominatorTree *DT = 0;
@@ -1340,6 +1343,7 @@ bool SROA::performPromotion(Function &F) {
bool Changed = false;
SmallVector<Instruction*, 64> Insts;
+ DIBuilder *DIB = 0;
while (1) {
Allocas.clear();
@@ -1363,8 +1367,11 @@ bool SROA::performPromotion(Function &F) {
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
Insts.push_back(cast<Instruction>(*UI));
-
- AllocaPromoter(Insts, SSA).run(AI, Insts);
+
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+ if (DDI && !DIB)
+ DIB = new DIBuilder(*AI->getParent()->getParent()->getParent());
+ AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts);
Insts.clear();
}
}
@@ -1372,6 +1379,10 @@ bool SROA::performPromotion(Function &F) {
Changed = true;
}
+ // FIXME: Is there a better way to handle the lazy initialization of DIB
+ // so that there doesn't need to be an explicit delete?
+ delete DIB;
+
return Changed;
}
@@ -1831,9 +1842,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
// (Also works for arrays instead of structs)
Value *Insert = UndefValue::get(LIType);
+ IRBuilder<> Builder(LI);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- Value *Load = new LoadInst(NewElts[i], "load", LI);
- Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+ Value *Load = Builder.CreateLoad(NewElts[i], "load");
+ Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
}
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
@@ -1858,9 +1870,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// %val.1 = extractvalue { i32, i32 } %val, 1
// store i32 %val.1, i32* %alloc.1
// (Also works for arrays instead of structs)
+ IRBuilder<> Builder(SI);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
- new StoreInst(Extract, NewElts[i], SI);
+ Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
+ Builder.CreateStore(Extract, NewElts[i]);
}
DeadInsts.push_back(SI);
} else if (SIType->isIntegerTy() &&
@@ -2481,19 +2494,22 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
}
if (CallSite CS = U) {
- // If this is a readonly/readnone call site, then we know it is just a
- // load and we can ignore it.
- if (CS.onlyReadsMemory())
- continue;
-
// If this is the function being called then we treat it like a load and
// ignore it.
if (CS.isCallee(UI))
continue;
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() ||
+ CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)))
+ continue;
+
// If this is being passed as a byval argument, the caller is making a
// copy, so it is only a read of the alloca.
- unsigned ArgNo = CS.getArgumentNo(UI);
if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
continue;
}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 1137c2b..7e9cc80 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -96,6 +96,7 @@ static void ChangeToCall(InvokeInst *II) {
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Follow the call by a branch to the normal destination.
@@ -163,7 +164,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
Changed = true;
}
- Changed |= ConstantFoldTerminator(BB);
+ Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
Worklist.push_back(*SI);
} while (!Worklist.empty());
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 539cc6f..e21eb9d 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -59,6 +59,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
@@ -209,10 +210,10 @@ bool TailCallElim::runOnFunction(Function &F) {
}
}
- // Finally, if this function contains no non-escaping allocas, mark all calls
- // in the function as eligible for tail calls (there is no stack memory for
- // them to access).
- if (!FunctionContainsEscapingAllocas)
+ // Finally, if this function contains no non-escaping allocas, or calls
+ // setjmp, mark all calls in the function as eligible for tail calls
+ //(there is no stack memory for them to access).
+ if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (CallInst *CI = dyn_cast<CallInst>(I)) {
OpenPOWER on IntegriCloud