summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp647
1 files changed, 322 insertions, 325 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 958348d..73e44d7 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,8 +37,8 @@
//
// TODO: Handle multiple loops at a time.
//
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-// instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+// of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
@@ -54,27 +54,27 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
-#include "llvm/AddressingMode.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@@ -224,16 +224,24 @@ namespace {
/// computing satisfying a use. It may include broken-out immediates and scaled
/// registers.
struct Formula {
- /// AM - This is used to represent complex addressing, as well as other kinds
- /// of interesting uses.
- AddrMode AM;
+ /// Global base address used for complex addressing.
+ GlobalValue *BaseGV;
+
+ /// Base offset for complex addressing.
+ int64_t BaseOffset;
+
+ /// Whether any complex addressing has a base register.
+ bool HasBaseReg;
+
+ /// The scale of any complex addressing.
+ int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty, AM.HasBaseReg should be set to true.
- SmallVector<const SCEV *, 2> BaseRegs;
+ /// non-empty,
+ SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when AM.Scale is not zero.
+ /// when Scale is not zero.
const SCEV *ScaledReg;
/// UnfoldedOffset - An additional constant offset which added near the
@@ -241,7 +249,9 @@ struct Formula {
/// live in an add immediate field rather than a register.
int64_t UnfoldedOffset;
- Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+ Formula()
+ : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+ UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
@@ -327,13 +337,13 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
- AM.HasBaseReg = true;
+ HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
- AM.HasBaseReg = true;
+ HasBaseReg = true;
}
}
@@ -349,7 +359,7 @@ unsigned Formula::getNumRegs() const {
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
- AM.BaseGV ? AM.BaseGV->getType() :
+ BaseGV ? BaseGV->getType() :
0;
}
@@ -382,29 +392,29 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
void Formula::print(raw_ostream &OS) const {
bool First = true;
- if (AM.BaseGV) {
+ if (BaseGV) {
if (!First) OS << " + "; else First = false;
- WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
}
- if (AM.BaseOffs != 0) {
+ if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
- OS << AM.BaseOffs;
+ OS << BaseOffset;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
- if (AM.HasBaseReg && BaseRegs.empty()) {
+ if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
- } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+ } else if (!HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
- if (AM.Scale != 0) {
+ if (Scale != 0) {
if (!First) OS << " + "; else First = false;
- OS << AM.Scale << "*reg(";
+ OS << Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
@@ -885,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
- if (isLoser())
+ if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
@@ -927,8 +937,8 @@ void Cost::RateFormula(const Formula &F,
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
- int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
- if (F.AM.BaseGV)
+ int64_t Offset = (uint64_t)*I + F.BaseOffset;
+ if (F.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
@@ -1078,19 +1088,19 @@ namespace {
/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
- static SmallVector<const SCEV *, 2> getEmptyKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getEmptyKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
- static SmallVector<const SCEV *, 2> getTombstoneKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getTombstoneKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
- static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
unsigned Result = 0;
for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
E = V.end(); I != E; ++I)
@@ -1098,8 +1108,8 @@ struct UniquifierDenseMapInfo {
return Result;
}
- static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
- const SmallVector<const SCEV *, 2> &RHS) {
+ static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+ const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
@@ -1110,7 +1120,7 @@ struct UniquifierDenseMapInfo {
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
- DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+ DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
@@ -1169,7 +1179,7 @@ public:
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
@@ -1179,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
@@ -1270,46 +1280,42 @@ void LSRUse::dump() const {
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const AddrMode &AM,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+ Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
- // If we have low-level target information, ask the target if it can
- // completely fold this address.
- if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+ return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
// Otherwise, just guess that reg+reg addressing is legal.
- return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+ //return ;
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
- if (AM.BaseGV)
+ if (BaseGV)
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
- if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+ if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
// putting the scaled register in the other operand of the icmp.
- if (AM.Scale != 0 && AM.Scale != -1)
+ if (Scale != 0 && Scale != -1)
return false;
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
- if (AM.BaseOffs != 0) {
- if (!TLI)
- return false;
+ if (BaseOffset != 0) {
// We have one of:
- // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
- // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+ // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+ // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
// Offs is the ICmp immediate.
- int64_t Offs = AM.BaseOffs;
- if (AM.Scale == 0)
- Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
- return TLI->isLegalICmpImmediate(Offs);
+ if (Scale == 0)
+ // The cast does the right thing with INT64_MIN.
+ BaseOffset = -(uint64_t)BaseOffset;
+ return TTI.isLegalICmpImmediate(BaseOffset);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1317,92 +1323,87 @@ static bool isLegalUse(const AddrMode &AM,
case LSRUse::Basic:
// Only handle single-register values.
- return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+ return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
// Special case Basic to handle -1 scales.
- return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
+ return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(AddrMode AM,
- int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
// Check for overflow.
- if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+ if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
- AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
- if (isLegalUse(AM, Kind, AccessTy, TLI)) {
- AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
- // Check for overflow.
- if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
- (MaxOffset > 0))
- return false;
- AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
- return isLegalUse(AM, Kind, AccessTy, TLI);
- }
- return false;
+ MinOffset = (uint64_t)BaseOffset + MinOffset;
+ if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+ (MaxOffset > 0))
+ return false;
+ MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+ Scale) &&
+ isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isAlwaysFoldable(int64_t BaseOffs,
- GlobalValue *BaseGV,
- bool HasBaseReg,
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg) {
// Fast-path: zero is always foldable.
- if (BaseOffs == 0 && !BaseGV) return true;
+ if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- AddrMode AM;
- AM.BaseOffs = BaseOffs;
- AM.BaseGV = BaseGV;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
// Canonicalize a scale of 1 to a base register if the formula doesn't
// already have a base register.
- if (!AM.HasBaseReg && AM.Scale == 1) {
- AM.Scale = 0;
- AM.HasBaseReg = true;
+ if (!HasBaseReg && Scale == 1) {
+ Scale = 0;
+ HasBaseReg = true;
}
- return isLegalUse(AM, Kind, AccessTy, TLI);
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
}
-static bool isAlwaysFoldable(const SCEV *S,
- int64_t MinOffset, int64_t MaxOffset,
- bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI,
- ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ Type *AccessTy, const SCEV *S, bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- int64_t BaseOffs = ExtractImmediate(S, SE);
+ int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
- if (BaseOffs == 0 && !BaseGV) return true;
+ if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- AddrMode AM;
- AM.BaseOffs = BaseOffs;
- AM.BaseGV = BaseGV;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
@@ -1502,7 +1503,7 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
- const TargetLowering *const TLI;
+ const TargetTransformInfo &TTI;
Loop *const L;
bool Changed;
@@ -1638,7 +1639,7 @@ class LSRInstance {
Pass *P);
public:
- LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+ LSRInstance(Loop *L, Pass *P);
bool getChanged() const { return Changed; }
@@ -1688,12 +1689,9 @@ void LSRInstance::OptimizeShadowIV() {
}
if (!DestTy) continue;
- if (TLI) {
- // If target does not support DestTy natively then do not apply
- // this transformation.
- EVT DVT = TLI->getValueType(DestTy);
- if (!TLI->isTypeLegal(DVT)) continue;
- }
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
@@ -1897,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
@@ -2015,18 +2011,17 @@ LSRInstance::OptimizeLoopTermCond() {
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
- // Without TLI, assume that any stride might be valid, and so any
- // use might be shared.
- if (!TLI)
- goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
- AddrMode AM;
- AM.Scale = C->getSExtValue();
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ int64_t Scale = C->getSExtValue();
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
- AM.Scale = -AM.Scale;
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ Scale = -Scale;
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
}
}
@@ -2096,13 +2091,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
@@ -2131,7 +2126,8 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
}
@@ -2199,10 +2195,10 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
// as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
- F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
+ F.BaseGV == OrigF.BaseGV &&
+ F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
- if (F.AM.BaseOffs == 0)
+ if (F.BaseOffset == 0)
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
@@ -2396,7 +2392,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
- ScalarEvolution &SE, const TargetLowering *TLI) {
+ ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
@@ -2539,6 +2535,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Add this IV user to the end of the chain.
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
+ IVChain &Chain = IVChainVec[ChainIdx];
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
// This chain's NearUsers become FarUsers.
@@ -2556,8 +2553,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
for (Value::use_iterator UseIter = IVOper->use_begin(),
UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
- if (!OtherUse || OtherUse == UserInst)
+ if (!OtherUse)
continue;
+ // Uses in the chain will no longer be uses if the chain is formed.
+ // Include the head of the chain in this iteration (not Chain.begin()).
+ IVChain::const_iterator IncIter = Chain.Incs.begin();
+ IVChain::const_iterator IncEnd = Chain.Incs.end();
+ for( ; IncIter != IncEnd; ++IncIter) {
+ if (IncIter->UserInst == OtherUse)
+ break;
+ }
+ if (IncIter != IncEnd)
+ continue;
+
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
@@ -2654,7 +2662,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+ ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@@ -2681,7 +2689,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
- Value *Operand, const TargetLowering *TLI) {
+ Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
@@ -2690,8 +2698,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
return false;
int64_t IncOffset = IncConst->getValue()->getSExtValue();
- if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
- LSRUse::Address, getAccessType(UserInst), TLI))
+ if (!isAlwaysFoldable(TTI, LSRUse::Address,
+ getAccessType(UserInst), /*BaseGV=*/ 0,
+ IncOffset, /*HaseBaseReg=*/ false))
return false;
return true;
@@ -2705,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
@@ -2762,7 +2772,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- TLI)) {
+ TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
@@ -2904,7 +2914,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
- F.AM.HasBaseReg = true;
+ F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
@@ -3106,9 +3116,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
- if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
- Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
// Collect all operands except *J.
@@ -3120,9 +3129,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
- Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,10 +3140,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (TLI && InnerSumSC &&
+ if (InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
F.BaseRegs.erase(F.BaseRegs.begin() + i);
@@ -3144,9 +3152,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
else
@@ -3195,7 +3203,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
- if (Base.AM.BaseGV) return;
+ if (Base.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
@@ -3203,9 +3211,8 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
if (G->isZero() || !GV)
continue;
Formula F = Base;
- F.AM.BaseGV = GV;
- if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3228,9 +3235,9 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
- F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
- if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
- LU.Kind, LU.AccessTy, TLI)) {
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
@@ -3248,9 +3255,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (G->isZero() || Imm == 0)
continue;
Formula F = Base;
- F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
- if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3271,7 +3277,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
- assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+ assert(!Base.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3279,10 +3285,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
int64_t Factor = *I;
// Check that the multiplication doesn't overflow.
- if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+ if (Base.BaseOffset == INT64_MIN && Factor == -1)
continue;
- int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+ int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+ if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
// Check that multiplying with the use offset doesn't overflow.
@@ -3294,14 +3300,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
continue;
Formula F = Base;
- F.AM.BaseOffs = NewBaseOffs;
+ F.BaseOffset = NewBaseOffset;
// Check that this scale is legal.
- if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
// Compensate for the use having MinOffset built into it.
- F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
@@ -3342,23 +3348,23 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.AM.Scale != 0) return;
+ if (Base.Scale != 0) return;
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
- Base.AM.Scale = Factor;
- Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+ Base.Scale = Factor;
+ Base.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
- if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ Base)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
- isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LSRUse::Special, LU.AccessTy, TLI) &&
+ isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+ LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
@@ -3367,7 +3373,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
if (LU.Kind == LSRUse::ICmpZero &&
- !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+ !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
@@ -3391,11 +3397,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
- // This requires TargetLowering to tell us which truncates are free.
- if (!TLI) return;
-
// Don't bother truncating symbolic values.
- if (Base.AM.BaseGV) return;
+ if (Base.BaseGV) return;
// Determine the integer type for the base formula.
Type *DstTy = Base.getType();
@@ -3405,7 +3408,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
- if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3552,16 +3555,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
const Formula &F = LU.Formulae[L];
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
- int64_t Offs = (uint64_t)F.AM.BaseOffs +
- Imm * (uint64_t)F.AM.Scale;
+ int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
- ConstantInt::get(IntTy, -(uint64_t)Offs))))
+ ConstantInt::get(IntTy, -(uint64_t)Offset))))
continue;
Formula NewF = F;
- NewF.AM.BaseOffs = Offs;
- if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ NewF.BaseOffset = Offset;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
@@ -3570,9 +3572,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() !=
- (NewF.AM.BaseOffs < 0) &&
- (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
- .ule(abs64(NewF.AM.BaseOffs)))
+ (NewF.BaseOffset < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+ .ule(abs64(NewF.BaseOffset)))
continue;
// OK, looks good.
@@ -3584,11 +3586,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
- NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
- if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
- if (!TLI ||
- !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, NewF)) {
+ if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3602,11 +3603,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
- if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
- abs64(NewF.AM.BaseOffs)) &&
+ if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+ abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
- NewF.AM.BaseOffs).countTrailingZeros() >=
- CountTrailingZeros_64(NewF.AM.BaseOffs))
+ NewF.BaseOffset).countTrailingZeros() >=
+ CountTrailingZeros_64(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
@@ -3667,7 +3668,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
- typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
@@ -3702,7 +3703,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
dbgs() << "\n");
}
else {
- SmallVector<const SCEV *, 2> Key;
+ SmallVector<const SCEV *, 4> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
@@ -3804,7 +3805,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
- NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+ NewF.BaseOffset += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3817,9 +3818,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
- if (!F.AM.BaseGV) {
+ if (!F.BaseGV) {
Formula NewF = F;
- NewF.AM.BaseGV = GV;
+ NewF.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3848,84 +3849,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
/// for expressions like A, A+1, A+2, etc., allocate a single register for
/// them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
- if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
- DEBUG(dbgs() << "The search space is too complex.\n");
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
- DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
- "separated by a constant offset will use the same "
- "registers.\n");
+ DEBUG(dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by assuming that uses separated "
+ "by a constant offset will use the same registers.\n");
- // This is especially useful for unrolled loops.
+ // This is especially useful for unrolled loops.
- for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
- LSRUse &LU = Uses[LUIdx];
- for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
- E = LU.Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
- if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
- if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
- if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
- /*HasBaseReg=*/false,
- LU.Kind, LU.AccessTy)) {
- DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
- dbgs() << '\n');
-
- LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
-
- // Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
- if (Fixup.LUIdx == LUIdx) {
- Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.AM.BaseOffs;
- // Add the new offset to LUThatHas' offset list.
- if (LUThatHas->Offsets.back() != Fixup.Offset) {
- LUThatHas->Offsets.push_back(Fixup.Offset);
- if (Fixup.Offset > LUThatHas->MaxOffset)
- LUThatHas->MaxOffset = Fixup.Offset;
- if (Fixup.Offset < LUThatHas->MinOffset)
- LUThatHas->MinOffset = Fixup.Offset;
- }
- DEBUG(dbgs() << "New fixup has offset "
- << Fixup.Offset << '\n');
- }
- if (Fixup.LUIdx == NumUses-1)
- Fixup.LUIdx = LUIdx;
- }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.BaseOffset == 0 || F.Scale != 0)
+ continue;
- // Delete formulae from the new use which are no longer legal.
- bool Any = false;
- for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
- Formula &F = LUThatHas->Formulae[i];
- if (!isLegalUse(F.AM,
- LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs());
- dbgs() << '\n');
- LUThatHas->DeleteFormula(F);
- --i;
- --e;
- Any = true;
- }
- }
- if (Any)
- LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+ LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
+ if (!LUThatHas)
+ continue;
- // Delete the old use.
- DeleteUse(LU, LUIdx);
- --LUIdx;
- --NumUses;
- break;
- }
+ if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
+ LU.Kind, LU.AccessTy))
+ continue;
+
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.BaseOffset;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
}
+ DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
}
- }
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, F)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Delete the old use.
+ DeleteUse(LU, LUIdx);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
}
+
+ DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
@@ -4308,7 +4308,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
- if (F.AM.Scale != 0) {
+ if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
// If we're expanding for a post-inc user, make the post-inc adjustment.
@@ -4321,7 +4321,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
- assert(F.AM.Scale == -1 &&
+ assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
} else {
@@ -4336,20 +4336,20 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.AM.Scale));
+ SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
// Expand the GV portion.
- if (F.AM.BaseGV) {
+ if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ Ops.push_back(SE.getUnknown(F.BaseGV));
}
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
@@ -4361,7 +4361,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
// Expand the immediate portion.
- int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+ int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
// The other interesting way of "folding" with an ICmpZero is to use a
@@ -4402,9 +4402,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
DeadInsts.push_back(CI->getOperand(1));
- assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+ assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
- if (F.AM.Scale == -1) {
+ if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
@@ -4414,7 +4414,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.AM.Scale == 0 &&
+ assert(F.Scale == 0 &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4589,13 +4589,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
- : IU(P->getAnalysis<IVUsers>()),
- SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTree>()),
- LI(P->getAnalysis<LoopInfo>()),
- TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+ TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+ IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -4678,14 +4676,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
#ifndef NDEBUG
// Formulae should be legal.
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- const LSRUse &LU = *I;
- for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
- JE = LU.Formulae.end(); J != JE; ++J)
- assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI) &&
- "Illegal formula generated!");
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+ I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end();
+ J != JE; ++J)
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ *J) && "Illegal formula generated!");
};
#endif
@@ -4757,13 +4755,9 @@ void LSRInstance::dump() const {
namespace {
class LoopStrengthReduce : public LoopPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetLowering *const TLI;
-
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+ LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4775,6 +4769,7 @@ private:
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
@@ -4784,14 +4779,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
- return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+ return new LoopStrengthReduce();
}
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(ID), TLI(tli) {
- initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
- }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
@@ -4810,24 +4804,27 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
+ AU.addRequired<TargetTransformInfo>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(TLI, L, this).getChanged();
+ Changed |= LSRInstance(L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
- if (EnablePhiElim) {
+ if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
- unsigned numFolded = Rewriter.
- replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+ unsigned numFolded =
+ Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+ DeadInsts,
+ &getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
OpenPOWER on IntegriCloud