diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 647 |
1 files changed, 322 insertions, 325 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 958348d..73e44d7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -37,8 +37,8 @@ // // TODO: Handle multiple loops at a time. // -// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr -// instead of a GlobalValue? +// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead +// of a GlobalValue? // // TODO: When truncation is free, truncate ICmp users' operands to make it a // smaller encoding (on x86 at least). @@ -54,27 +54,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" -#include "llvm/AddressingMode.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Analysis/IVUsers.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/Support/Debug.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -224,16 +224,24 @@ namespace { /// computing satisfying a use. It may include broken-out immediates and scaled /// registers. struct Formula { - /// AM - This is used to represent complex addressing, as well as other kinds - /// of interesting uses. - AddrMode AM; + /// Global base address used for complex addressing. + GlobalValue *BaseGV; + + /// Base offset for complex addressing. + int64_t BaseOffset; + + /// Whether any complex addressing has a base register. + bool HasBaseReg; + + /// The scale of any complex addressing. + int64_t Scale; /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, AM.HasBaseReg should be set to true. - SmallVector<const SCEV *, 2> BaseRegs; + /// non-empty, + SmallVector<const SCEV *, 4> BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null - /// when AM.Scale is not zero. + /// when Scale is not zero. const SCEV *ScaledReg; /// UnfoldedOffset - An additional constant offset which added near the @@ -241,7 +249,9 @@ struct Formula { /// live in an add immediate field rather than a register. int64_t UnfoldedOffset; - Formula() : ScaledReg(0), UnfoldedOffset(0) {} + Formula() + : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0), + UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -327,13 +337,13 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { const SCEV *Sum = SE.getAddExpr(Good); if (!Sum->isZero()) BaseRegs.push_back(Sum); - AM.HasBaseReg = true; + HasBaseReg = true; } if (!Bad.empty()) { const SCEV *Sum = SE.getAddExpr(Bad); if (!Sum->isZero()) BaseRegs.push_back(Sum); - AM.HasBaseReg = true; + HasBaseReg = true; } } @@ -349,7 +359,7 @@ unsigned Formula::getNumRegs() const { Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : - AM.BaseGV ? AM.BaseGV->getType() : + BaseGV ? BaseGV->getType() : 0; } @@ -382,29 +392,29 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, void Formula::print(raw_ostream &OS) const { bool First = true; - if (AM.BaseGV) { + if (BaseGV) { if (!First) OS << " + "; else First = false; - WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false); + WriteAsOperand(OS, BaseGV, /*PrintType=*/false); } - if (AM.BaseOffs != 0) { + if (BaseOffset != 0) { if (!First) OS << " + "; else First = false; - OS << AM.BaseOffs; + OS << BaseOffset; } for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), E = BaseRegs.end(); I != E; ++I) { if (!First) OS << " + "; else First = false; OS << "reg(" << **I << ')'; } - if (AM.HasBaseReg && BaseRegs.empty()) { + if (HasBaseReg && BaseRegs.empty()) { if (!First) OS << " + "; else First = false; OS << "**error: HasBaseReg**"; - } else if (!AM.HasBaseReg && !BaseRegs.empty()) { + } else if (!HasBaseReg && !BaseRegs.empty()) { if (!First) OS << " + "; else First = false; OS << "**error: !HasBaseReg**"; } - if (AM.Scale != 0) { + if (Scale != 0) { if (!First) OS << " + "; else First = false; - OS << AM.Scale << "*reg("; + OS << Scale << "*reg("; if (ScaledReg) OS << *ScaledReg; else @@ -885,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg, } if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); - if (isLoser()) + if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } } @@ -927,8 +937,8 @@ void Cost::RateFormula(const Formula &F, // Tally up the non-zero immediates. for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), E = Offsets.end(); I != E; ++I) { - int64_t Offset = (uint64_t)*I + F.AM.BaseOffs; - if (F.AM.BaseGV) + int64_t Offset = (uint64_t)*I + F.BaseOffset; + if (F.BaseGV) ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) @@ -1078,19 +1088,19 @@ namespace { /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. struct UniquifierDenseMapInfo { - static SmallVector<const SCEV *, 2> getEmptyKey() { - SmallVector<const SCEV *, 2> V; + static SmallVector<const SCEV *, 4> getEmptyKey() { + SmallVector<const SCEV *, 4> V; V.push_back(reinterpret_cast<const SCEV *>(-1)); return V; } - static SmallVector<const SCEV *, 2> getTombstoneKey() { - SmallVector<const SCEV *, 2> V; + static SmallVector<const SCEV *, 4> getTombstoneKey() { + SmallVector<const SCEV *, 4> V; V.push_back(reinterpret_cast<const SCEV *>(-2)); return V; } - static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) { + static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { unsigned Result = 0; for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(), E = V.end(); I != E; ++I) @@ -1098,8 +1108,8 @@ struct UniquifierDenseMapInfo { return Result; } - static bool isEqual(const SmallVector<const SCEV *, 2> &LHS, - const SmallVector<const SCEV *, 2> &RHS) { + static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, + const SmallVector<const SCEV *, 4> &RHS) { return LHS == RHS; } }; @@ -1110,7 +1120,7 @@ struct UniquifierDenseMapInfo { /// the user itself, and information about how the use may be satisfied. /// TODO: Represent multiple users of the same expression in common? class LSRUse { - DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier; + DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; public: /// KindType - An enum for a kind of use, indicating what types of @@ -1169,7 +1179,7 @@ public: /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { - SmallVector<const SCEV *, 2> Key = F.BaseRegs; + SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -1179,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { - SmallVector<const SCEV *, 2> Key = F.BaseRegs; + SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -1270,46 +1280,42 @@ void LSRUse::dump() const { /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes /// address-mode folding and special icmp tricks. -static bool isLegalUse(const AddrMode &AM, - LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { +static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, + Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: - // If we have low-level target information, ask the target if it can - // completely fold this address. - if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy); + return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); // Otherwise, just guess that reg+reg addressing is legal. - return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1; + //return ; case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to // fold a GV into an ICmp. - if (AM.BaseGV) + if (BaseGV) return false; // ICmp only has two operands; don't allow more than two non-trivial parts. - if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0) + if (Scale != 0 && HasBaseReg && BaseOffset != 0) return false; // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by // putting the scaled register in the other operand of the icmp. - if (AM.Scale != 0 && AM.Scale != -1) + if (Scale != 0 && Scale != -1) return false; // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. - if (AM.BaseOffs != 0) { - if (!TLI) - return false; + if (BaseOffset != 0) { // We have one of: - // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset - // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset + // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset + // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset // Offs is the ICmp immediate. - int64_t Offs = AM.BaseOffs; - if (AM.Scale == 0) - Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN. - return TLI->isLegalICmpImmediate(Offs); + if (Scale == 0) + // The cast does the right thing with INT64_MIN. + BaseOffset = -(uint64_t)BaseOffset; + return TTI.isLegalICmpImmediate(BaseOffset); } // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg @@ -1317,92 +1323,87 @@ static bool isLegalUse(const AddrMode &AM, case LSRUse::Basic: // Only handle single-register values. - return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0; + return !BaseGV && Scale == 0 && BaseOffset == 0; case LSRUse::Special: // Special case Basic to handle -1 scales. - return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0; + return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0; } llvm_unreachable("Invalid LSRUse Kind!"); } -static bool isLegalUse(AddrMode AM, - int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { // Check for overflow. - if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != + if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) return false; - AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset; - if (isLegalUse(AM, Kind, AccessTy, TLI)) { - AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset; - // Check for overflow. - if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) != - (MaxOffset > 0)) - return false; - AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset; - return isLegalUse(AM, Kind, AccessTy, TLI); - } - return false; + MinOffset = (uint64_t)BaseOffset + MinOffset; + if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) != + (MaxOffset > 0)) + return false; + MaxOffset = (uint64_t)BaseOffset + MaxOffset; + + return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, + Scale) && + isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); +} + +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + const Formula &F) { + return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, + F.BaseOffset, F.HasBaseReg, F.Scale); } -static bool isAlwaysFoldable(int64_t BaseOffs, - GlobalValue *BaseGV, - bool HasBaseReg, +static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI) { + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg) { // Fast-path: zero is always foldable. - if (BaseOffs == 0 && !BaseGV) return true; + if (BaseOffset == 0 && !BaseGV) return true; // Conservatively, create an address with an immediate and a // base and a scale. - AddrMode AM; - AM.BaseOffs = BaseOffs; - AM.BaseGV = BaseGV; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; // Canonicalize a scale of 1 to a base register if the formula doesn't // already have a base register. - if (!AM.HasBaseReg && AM.Scale == 1) { - AM.Scale = 0; - AM.HasBaseReg = true; + if (!HasBaseReg && Scale == 1) { + Scale = 0; + HasBaseReg = true; } - return isLegalUse(AM, Kind, AccessTy, TLI); + return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); } -static bool isAlwaysFoldable(const SCEV *S, - int64_t MinOffset, int64_t MaxOffset, - bool HasBaseReg, - LSRUse::KindType Kind, Type *AccessTy, - const TargetLowering *TLI, - ScalarEvolution &SE) { +static bool isAlwaysFoldable(const TargetTransformInfo &TTI, + ScalarEvolution &SE, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, + Type *AccessTy, const SCEV *S, bool HasBaseReg) { // Fast-path: zero is always foldable. if (S->isZero()) return true; // Conservatively, create an address with an immediate and a // base and a scale. - int64_t BaseOffs = ExtractImmediate(S, SE); + int64_t BaseOffset = ExtractImmediate(S, SE); GlobalValue *BaseGV = ExtractSymbol(S, SE); // If there's anything else involved, it's not foldable. if (!S->isZero()) return false; // Fast-path: zero is always foldable. - if (BaseOffs == 0 && !BaseGV) return true; + if (BaseOffset == 0 && !BaseGV) return true; // Conservatively, create an address with an immediate and a // base and a scale. - AddrMode AM; - AM.BaseOffs = BaseOffs; - AM.BaseGV = BaseGV; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); + return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale); } namespace { @@ -1502,7 +1503,7 @@ class LSRInstance { ScalarEvolution &SE; DominatorTree &DT; LoopInfo &LI; - const TargetLowering *const TLI; + const TargetTransformInfo &TTI; Loop *const L; bool Changed; @@ -1638,7 +1639,7 @@ class LSRInstance { Pass *P); public: - LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); + LSRInstance(Loop *L, Pass *P); bool getChanged() const { return Changed; } @@ -1688,12 +1689,9 @@ void LSRInstance::OptimizeShadowIV() { } if (!DestTy) continue; - if (TLI) { - // If target does not support DestTy natively then do not apply - // this transformation. - EVT DVT = TLI->getValueType(DestTy); - if (!TLI->isTypeLegal(DVT)) continue; - } + // If target does not support DestTy natively then do not apply + // this transformation. + if (!TTI.isTypeLegal(DestTy)) continue; PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); if (!PH) continue; @@ -1897,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (!NewRHS) return Cond; } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) @@ -2015,18 +2011,17 @@ LSRInstance::OptimizeLoopTermCond() { if (C->getValue().getMinSignedBits() >= 64 || C->getValue().isMinSignedValue()) goto decline_post_inc; - // Without TLI, assume that any stride might be valid, and so any - // use might be shared. - if (!TLI) - goto decline_post_inc; // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); - AddrMode AM; - AM.Scale = C->getSExtValue(); - if (TLI->isLegalAddressingMode(AM, AccessTy)) + int64_t Scale = C->getSExtValue(); + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + /*BaseOffset=*/ 0, + /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; - AM.Scale = -AM.Scale; - if (TLI->isLegalAddressingMode(AM, AccessTy)) + Scale = -Scale; + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + /*BaseOffset=*/ 0, + /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; } } @@ -2096,13 +2091,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, return false; // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, - Kind, AccessTy, TLI)) + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, - Kind, AccessTy, TLI)) + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + NewOffset - LU.MinOffset, HasBaseReg)) return false; NewMaxOffset = NewOffset; } @@ -2131,7 +2126,8 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) { + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + Offset, /*HasBaseReg=*/ true)) { Expr = Copy; Offset = 0; } @@ -2199,10 +2195,10 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && - F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale && + F.BaseGV == OrigF.BaseGV && + F.Scale == OrigF.Scale && F.UnfoldedOffset == OrigF.UnfoldedOffset) { - if (F.AM.BaseOffs == 0) + if (F.BaseOffset == 0) return &LU; // This is the formula where all the registers and symbols matched; // there aren't going to be any others. Since we declined it, we @@ -2396,7 +2392,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr, /// TODO: Consider IVInc free if it's already used in another chains. static bool isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, - ScalarEvolution &SE, const TargetLowering *TLI) { + ScalarEvolution &SE, const TargetTransformInfo &TTI) { if (StressIVChain) return true; @@ -2539,6 +2535,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Add this IV user to the end of the chain. IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } + IVChain &Chain = IVChainVec[ChainIdx]; SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers; // This chain's NearUsers become FarUsers. @@ -2556,8 +2553,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, for (Value::use_iterator UseIter = IVOper->use_begin(), UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { Instruction *OtherUse = dyn_cast<Instruction>(*UseIter); - if (!OtherUse || OtherUse == UserInst) + if (!OtherUse) continue; + // Uses in the chain will no longer be uses if the chain is formed. + // Include the head of the chain in this iteration (not Chain.begin()). + IVChain::const_iterator IncIter = Chain.Incs.begin(); + IVChain::const_iterator IncEnd = Chain.Incs.end(); + for( ; IncIter != IncEnd; ++IncIter) { + if (IncIter->UserInst == OtherUse) + break; + } + if (IncIter != IncEnd) + continue; + if (SE.isSCEVable(OtherUse->getType()) && !isa<SCEVUnknown>(SE.getSCEV(OtherUse)) && IU.isIVUserOrOperand(OtherUse)) { @@ -2654,7 +2662,7 @@ void LSRInstance::CollectChains() { for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); UsersIdx < NChains; ++UsersIdx) { if (!isProfitableChain(IVChainVec[UsersIdx], - ChainUsersVec[UsersIdx].FarUsers, SE, TLI)) + ChainUsersVec[UsersIdx].FarUsers, SE, TTI)) continue; // Preserve the chain at UsesIdx. if (ChainIdx != UsersIdx) @@ -2681,7 +2689,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { /// Return true if the IVInc can be folded into an addressing mode. static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, - Value *Operand, const TargetLowering *TLI) { + Value *Operand, const TargetTransformInfo &TTI) { const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); if (!IncConst || !isAddressUse(UserInst, Operand)) return false; @@ -2690,8 +2698,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, return false; int64_t IncOffset = IncConst->getValue()->getSExtValue(); - if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false, - LSRUse::Address, getAccessType(UserInst), TLI)) + if (!isAlwaysFoldable(TTI, LSRUse::Address, + getAccessType(UserInst), /*BaseGV=*/ 0, + IncOffset, /*HaseBaseReg=*/ false)) return false; return true; @@ -2705,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // by LSR. const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); + // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); Value *IVSrc = 0; @@ -2762,7 +2772,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // If an IV increment can't be folded, use it as the next IV value. if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand, - TLI)) { + TTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; LeftOverExpr = 0; @@ -2904,7 +2914,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; F.BaseRegs.push_back(S); - F.AM.HasBaseReg = true; + F.HasBaseReg = true; bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; } @@ -3106,9 +3116,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Don't pull a constant into a register if the constant could be folded // into an immediate field. - if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, - Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, TLI, SE)) + if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, *J, Base.getNumRegs() > 1)) continue; // Collect all operands except *J. @@ -3120,9 +3129,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. if (InnerAddOps.size() == 1 && - isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset, - Base.getNumRegs() > 1, - LU.Kind, LU.AccessTy, TLI, SE)) + isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) continue; const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); @@ -3132,10 +3140,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add the remaining pieces of the add back into the new formula. const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); - if (TLI && InnerSumSC && + if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue())) { + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); F.BaseRegs.erase(F.BaseRegs.begin() + i); @@ -3144,9 +3152,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Add J as its own register, or an unfolded immediate. const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); - if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue())) + if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); else @@ -3195,7 +3203,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. - if (Base.AM.BaseGV) return; + if (Base.BaseGV) return; for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *G = Base.BaseRegs[i]; @@ -3203,9 +3211,8 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, if (G->isZero() || !GV) continue; Formula F = Base; - F.AM.BaseGV = GV; - if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + F.BaseGV = GV; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3228,9 +3235,9 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), E = Worklist.end(); I != E; ++I) { Formula F = Base; - F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; - if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, - LU.Kind, LU.AccessTy, TLI)) { + F.BaseOffset = (uint64_t)Base.BaseOffset - *I; + if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, + LU.AccessTy, F)) { // Add the offset to the base register. const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. @@ -3248,9 +3255,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (G->isZero() || Imm == 0) continue; Formula F = Base; - F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm; - if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + F.BaseOffset = (uint64_t)F.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) continue; F.BaseRegs[i] = G; (void)InsertFormula(LU, LUIdx, F); @@ -3271,7 +3277,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Don't do this if there is more than one offset. if (LU.MinOffset != LU.MaxOffset) return; - assert(!Base.AM.BaseGV && "ICmpZero use is not legal!"); + assert(!Base.BaseGV && "ICmpZero use is not legal!"); // Check each interesting stride. for (SmallSetVector<int64_t, 8>::const_iterator @@ -3279,10 +3285,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, int64_t Factor = *I; // Check that the multiplication doesn't overflow. - if (Base.AM.BaseOffs == INT64_MIN && Factor == -1) + if (Base.BaseOffset == INT64_MIN && Factor == -1) continue; - int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; - if (NewBaseOffs / Factor != Base.AM.BaseOffs) + int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; + if (NewBaseOffset / Factor != Base.BaseOffset) continue; // Check that multiplying with the use offset doesn't overflow. @@ -3294,14 +3300,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, continue; Formula F = Base; - F.AM.BaseOffs = NewBaseOffs; + F.BaseOffset = NewBaseOffset; // Check that this scale is legal. - if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) + if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) continue; // Compensate for the use having MinOffset built into it. - F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset; + F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset; const SCEV *FactorS = SE.getConstant(IntTy, Factor); @@ -3342,23 +3348,23 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. - if (Base.AM.Scale != 0) return; + if (Base.Scale != 0) return; // Check each interesting stride. for (SmallSetVector<int64_t, 8>::const_iterator I = Factors.begin(), E = Factors.end(); I != E; ++I) { int64_t Factor = *I; - Base.AM.Scale = Factor; - Base.AM.HasBaseReg = Base.BaseRegs.size() > 1; + Base.Scale = Factor; + Base.HasBaseReg = Base.BaseRegs.size() > 1; // Check whether this scale is going to be legal. - if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) { + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, + Base)) { // As a special-case, handle special out-of-loop Basic users specially. // TODO: Reconsider this special case. if (LU.Kind == LSRUse::Basic && - isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, - LSRUse::Special, LU.AccessTy, TLI) && + isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, + LU.AccessTy, Base) && LU.AllFixupsOutsideLoop) LU.Kind = LSRUse::Special; else @@ -3367,7 +3373,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // For an ICmpZero, negating a solitary base register won't lead to // new solutions. if (LU.Kind == LSRUse::ICmpZero && - !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV) + !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV) continue; // For each addrec base reg, apply the scale, if possible. for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) @@ -3391,11 +3397,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { /// GenerateTruncates - Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { - // This requires TargetLowering to tell us which truncates are free. - if (!TLI) return; - // Don't bother truncating symbolic values. - if (Base.AM.BaseGV) return; + if (Base.BaseGV) return; // Determine the integer type for the base formula. Type *DstTy = Base.getType(); @@ -3405,7 +3408,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { Type *SrcTy = *I; - if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { + if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) { Formula F = Base; if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); @@ -3552,16 +3555,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { const Formula &F = LU.Formulae[L]; // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { - int64_t Offs = (uint64_t)F.AM.BaseOffs + - Imm * (uint64_t)F.AM.Scale; + int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; // Don't create 50 + reg(-50). if (F.referencesReg(SE.getSCEV( - ConstantInt::get(IntTy, -(uint64_t)Offs)))) + ConstantInt::get(IntTy, -(uint64_t)Offset)))) continue; Formula NewF = F; - NewF.AM.BaseOffs = Offs; - if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) + NewF.BaseOffset = Offset; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, + NewF)) continue; NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); @@ -3570,9 +3572,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) if (C->getValue()->isNegative() != - (NewF.AM.BaseOffs < 0) && - (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) - .ule(abs64(NewF.AM.BaseOffs))) + (NewF.BaseOffset < 0) && + (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale)) + .ule(abs64(NewF.BaseOffset))) continue; // OK, looks good. @@ -3584,11 +3586,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (BaseReg != OrigReg) continue; Formula NewF = F; - NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; - if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) { - if (!TLI || - !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, NewF)) { + if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F; NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; @@ -3602,11 +3603,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); J != JE; ++J) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) - if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt( - abs64(NewF.AM.BaseOffs)) && + if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt( + abs64(NewF.BaseOffset)) && (C->getValue()->getValue() + - NewF.AM.BaseOffs).countTrailingZeros() >= - CountTrailingZeros_64(NewF.AM.BaseOffs)) + NewF.BaseOffset).countTrailingZeros() >= + CountTrailingZeros_64(NewF.BaseOffset)) goto skip_formula; // Ok, looks good. @@ -3667,7 +3668,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // Collect the best formula for each unique set of shared registers. This // is reset for each use. - typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo> + typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo> BestFormulaeTy; BestFormulaeTy BestFormulae; @@ -3702,7 +3703,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { dbgs() << "\n"); } else { - SmallVector<const SCEV *, 2> Key; + SmallVector<const SCEV *, 4> Key; for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), JE = F.BaseRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; @@ -3804,7 +3805,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) { Formula NewF = F; - NewF.AM.BaseOffs += C->getValue()->getSExtValue(); + NewF.BaseOffset += C->getValue()->getSExtValue(); NewF.BaseRegs.erase(NewF.BaseRegs.begin() + (I - F.BaseRegs.begin())); if (LU.HasFormulaWithSameRegs(NewF)) { @@ -3817,9 +3818,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { } } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) { if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) - if (!F.AM.BaseGV) { + if (!F.BaseGV) { Formula NewF = F; - NewF.AM.BaseGV = GV; + NewF.BaseGV = GV; NewF.BaseRegs.erase(NewF.BaseRegs.begin() + (I - F.BaseRegs.begin())); if (LU.HasFormulaWithSameRegs(NewF)) { @@ -3848,84 +3849,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { /// for expressions like A, A+1, A+2, etc., allocate a single register for /// them. void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { - if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { - DEBUG(dbgs() << "The search space is too complex.\n"); + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; - DEBUG(dbgs() << "Narrowing the search space by assuming that uses " - "separated by a constant offset will use the same " - "registers.\n"); + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by assuming that uses separated " + "by a constant offset will use the same registers.\n"); - // This is especially useful for unrolled loops. + // This is especially useful for unrolled loops. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { - LSRUse &LU = Uses[LUIdx]; - for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), - E = LU.Formulae.end(); I != E; ++I) { - const Formula &F = *I; - if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) { - if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { - if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs, - /*HasBaseReg=*/false, - LU.Kind, LU.AccessTy)) { - DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); - dbgs() << '\n'); - - LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; - - // Update the relocs to reference the new use. - for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.AM.BaseOffs; - // Add the new offset to LUThatHas' offset list. - if (LUThatHas->Offsets.back() != Fixup.Offset) { - LUThatHas->Offsets.push_back(Fixup.Offset); - if (Fixup.Offset > LUThatHas->MaxOffset) - LUThatHas->MaxOffset = Fixup.Offset; - if (Fixup.Offset < LUThatHas->MinOffset) - LUThatHas->MinOffset = Fixup.Offset; - } - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.BaseOffset == 0 || F.Scale != 0) + continue; - // Delete formulae from the new use which are no longer legal. - bool Any = false; - for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { - Formula &F = LUThatHas->Formulae[i]; - if (!isLegalUse(F.AM, - LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, TLI)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); - dbgs() << '\n'); - LUThatHas->DeleteFormula(F); - --i; - --e; - Any = true; - } - } - if (Any) - LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); + if (!LUThatHas) + continue; - // Delete the old use. - DeleteUse(LU, LUIdx); - --LUIdx; - --NumUses; - break; - } + if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, + LU.Kind, LU.AccessTy)) + continue; + + DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); + + LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.BaseOffset; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; } + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; } - } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + // Delete formulae from the new use which are no longer legal. + bool Any = false; + for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { + Formula &F = LUThatHas->Formulae[i]; + if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, + LUThatHas->Kind, LUThatHas->AccessTy, F)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LUThatHas->DeleteFormula(F); + --i; + --e; + Any = true; + } + } + + if (Any) + LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + + // Delete the old use. + DeleteUse(LU, LUIdx); + --LUIdx; + --NumUses; + break; + } } + + DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call @@ -4308,7 +4308,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand the ScaledReg portion. Value *ICmpScaledV = 0; - if (F.AM.Scale != 0) { + if (F.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; // If we're expanding for a post-inc user, make the post-inc adjustment. @@ -4321,7 +4321,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. - assert(F.AM.Scale == -1 && + assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); } else { @@ -4336,20 +4336,20 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); ScaledS = SE.getMulExpr(ScaledS, - SE.getConstant(ScaledS->getType(), F.AM.Scale)); + SE.getConstant(ScaledS->getType(), F.Scale)); Ops.push_back(ScaledS); } } // Expand the GV portion. - if (F.AM.BaseGV) { + if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - Ops.push_back(SE.getUnknown(F.AM.BaseGV)); + Ops.push_back(SE.getUnknown(F.BaseGV)); } // Flush the operand list to suppress SCEVExpander hoisting of both folded and @@ -4361,7 +4361,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } // Expand the immediate portion. - int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset; + int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset; if (Offset != 0) { if (LU.Kind == LSRUse::ICmpZero) { // The other interesting way of "folding" with an ICmpZero is to use a @@ -4402,9 +4402,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (LU.Kind == LSRUse::ICmpZero) { ICmpInst *CI = cast<ICmpInst>(LF.UserInst); DeadInsts.push_back(CI->getOperand(1)); - assert(!F.AM.BaseGV && "ICmp does not support folding a global value and " + assert(!F.BaseGV && "ICmp does not support folding a global value and " "a scale at the same time!"); - if (F.AM.Scale == -1) { + if (F.Scale == -1) { if (ICmpScaledV->getType() != OpTy) { Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, @@ -4414,7 +4414,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } CI->setOperand(1, ICmpScaledV); } else { - assert(F.AM.Scale == 0 && + assert(F.Scale == 0 && "ICmp does not support folding a global value and " "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), @@ -4589,13 +4589,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Changed |= DeleteTriviallyDeadInstructions(DeadInsts); } -LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) - : IU(P->getAnalysis<IVUsers>()), - SE(P->getAnalysis<ScalarEvolution>()), - DT(P->getAnalysis<DominatorTree>()), - LI(P->getAnalysis<LoopInfo>()), - TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { - +LSRInstance::LSRInstance(Loop *L, Pass *P) + : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()), + DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()), + TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false), + IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -4678,14 +4676,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) #ifndef NDEBUG // Formulae should be legal. - for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), - E = Uses.end(); I != E; ++I) { - const LSRUse &LU = *I; - for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), - JE = LU.Formulae.end(); J != JE; ++J) - assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI) && - "Illegal formula generated!"); + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end(); + I != E; ++I) { + const LSRUse &LU = *I; + for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), + JE = LU.Formulae.end(); + J != JE; ++J) + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, + *J) && "Illegal formula generated!"); }; #endif @@ -4757,13 +4755,9 @@ void LSRInstance::dump() const { namespace { class LoopStrengthReduce : public LoopPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. - const TargetLowering *const TLI; - public: static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = 0); + LoopStrengthReduce(); private: bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -4775,6 +4769,7 @@ private: char LoopStrengthReduce::ID = 0; INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(IVUsers) @@ -4784,14 +4779,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) -Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { - return new LoopStrengthReduce(TLI); +Pass *llvm::createLoopStrengthReducePass() { + return new LoopStrengthReduce(); } -LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(ID), TLI(tli) { - initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); - } +LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { + initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); +} void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update @@ -4810,24 +4804,27 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); AU.addRequired<IVUsers>(); AU.addPreserved<IVUsers>(); + AU.addRequired<TargetTransformInfo>(); } bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { bool Changed = false; // Run the main LSR transformation. - Changed |= LSRInstance(TLI, L, this).getChanged(); + Changed |= LSRInstance(L, this).getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); - if (EnablePhiElim) { + if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector<WeakVH, 16> DeadInsts; SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif - unsigned numFolded = Rewriter. - replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI); + unsigned numFolded = + Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), + DeadInsts, + &getAnalysis<TargetTransformInfo>()); if (numFolded) { Changed = true; DeleteTriviallyDeadInstructions(DeadInsts); |