diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 706 |
1 files changed, 542 insertions, 164 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 194587a..3638da1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -129,6 +129,24 @@ static cl::opt<bool> EnablePhiElim( "enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination")); +// The flag adds instruction count to solutions cost comparision. +static cl::opt<bool> InsnsCost( + "lsr-insns-cost", cl::Hidden, cl::init(false), + cl::desc("Add instruction count to a LSR cost model")); + +// Flag to choose how to narrow complex lsr solution +static cl::opt<bool> LSRExpNarrow( + "lsr-exp-narrow", cl::Hidden, cl::init(false), + cl::desc("Narrow LSR complex solution using" + " expectation of registers number")); + +// Flag to narrow search space by filtering non-optimal formulae with +// the same ScaledReg and Scale. +static cl::opt<bool> FilterSameScaledReg( + "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), + cl::desc("Narrow LSR search space by filtering non-optimal formulae" + " with the same ScaledReg and Scale")); + #ifndef NDEBUG // Stress test IV chain generation. static cl::opt<bool> StressIVChain( @@ -181,10 +199,11 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } -LLVM_DUMP_METHOD -void RegSortData::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegSortData::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -295,9 +314,13 @@ struct Formula { /// canonical representation of a formula is /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). + /// 3. The reg containing recurrent expr related with currect loop in the + /// formula should be put in the ScaledReg. /// #1 enforces that the scaled register is always used when at least two /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. /// #2 enforces that 1 * reg is reg. + /// #3 ensures invariant regs with respect to current loop can be combined + /// together in LSR codegen. /// This invariant can be temporarly broken while building a formula. /// However, every formula inserted into the LSRInstance must be in canonical /// form. @@ -318,12 +341,14 @@ struct Formula { void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); - bool isCanonical() const; + bool isCanonical(const Loop &L) const; - void canonicalize(); + void canonicalize(const Loop &L); bool unscale(); + bool hasZeroEnd() const; + size_t getNumRegs() const; Type *getType() const; @@ -410,16 +435,35 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { BaseRegs.push_back(Sum); HasBaseReg = true; } - canonicalize(); + canonicalize(*L); } /// \brief Check whether or not this formula statisfies the canonical /// representation. /// \see Formula::BaseRegs. -bool Formula::isCanonical() const { - if (ScaledReg) - return Scale != 1 || !BaseRegs.empty(); - return BaseRegs.size() <= 1; +bool Formula::isCanonical(const Loop &L) const { + if (!ScaledReg) + return BaseRegs.size() <= 1; + + if (Scale != 1) + return true; + + if (Scale == 1 && BaseRegs.empty()) + return false; + + const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg); + if (SAR && SAR->getLoop() == &L) + return true; + + // If ScaledReg is not a recurrent expr, or it is but its loop is not current + // loop, meanwhile BaseRegs contains a recurrent expr reg related with current + // loop, we want to swap the reg in BaseRegs with ScaledReg. + auto I = + find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) { + return isa<const SCEVAddRecExpr>(S) && + (cast<SCEVAddRecExpr>(S)->getLoop() == &L); + }); + return I == BaseRegs.end(); } /// \brief Helper method to morph a formula into its canonical representation. @@ -428,21 +472,33 @@ bool Formula::isCanonical() const { /// field. Otherwise, we would have to do special cases everywhere in LSR /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... /// On the other hand, 1*reg should be canonicalized into reg. -void Formula::canonicalize() { - if (isCanonical()) +void Formula::canonicalize(const Loop &L) { + if (isCanonical(L)) return; // So far we did not need this case. This is easy to implement but it is // useless to maintain dead code. Beside it could hurt compile time. assert(!BaseRegs.empty() && "1*reg => reg, should not be needed."); + // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. - ScaledReg = BaseRegs.back(); - BaseRegs.pop_back(); - Scale = 1; - size_t BaseRegsSize = BaseRegs.size(); - size_t Try = 0; - // If ScaledReg is an invariant, try to find a variant expression. - while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg)) - std::swap(ScaledReg, BaseRegs[Try++]); + if (!ScaledReg) { + ScaledReg = BaseRegs.back(); + BaseRegs.pop_back(); + Scale = 1; + } + + // If ScaledReg is an invariant with respect to L, find the reg from + // BaseRegs containing the recurrent expr related with Loop L. Swap the + // reg with ScaledReg. + const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg); + if (!SAR || SAR->getLoop() != &L) { + auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()), + [&](const SCEV *S) { + return isa<const SCEVAddRecExpr>(S) && + (cast<SCEVAddRecExpr>(S)->getLoop() == &L); + }); + if (I != BaseRegs.end()) + std::swap(ScaledReg, *I); + } } /// \brief Get rid of the scale in the formula. @@ -458,6 +514,14 @@ bool Formula::unscale() { return true; } +bool Formula::hasZeroEnd() const { + if (UnfoldedOffset || BaseOffset) + return false; + if (BaseRegs.size() != 1 || ScaledReg) + return false; + return true; +} + /// Return the total number of register operands used by this formula. This does /// not include register uses implied by non-constant addrec strides. size_t Formula::getNumRegs() const { @@ -534,10 +598,11 @@ void Formula::print(raw_ostream &OS) const { } } -LLVM_DUMP_METHOD -void Formula::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void Formula::dump() const { print(errs()); errs() << '\n'; } +#endif /// Return true if the given addrec can be sign-extended without changing its /// value. @@ -711,7 +776,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { static bool isAddressUse(Instruction *Inst, Value *OperandVal) { bool isAddress = isa<LoadInst>(Inst); if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (SI->getOperand(1) == OperandVal) + if (SI->getPointerOperand() == OperandVal) isAddress = true; } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // Addressing modes can also be folded into prefetches and a variety @@ -723,6 +788,12 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { isAddress = true; break; } + } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) { + if (RMW->getPointerOperand() == OperandVal) + isAddress = true; + } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) { + if (CmpX->getPointerOperand() == OperandVal) + isAddress = true; } return isAddress; } @@ -735,6 +806,10 @@ static MemAccessTy getAccessType(const Instruction *Inst) { AccessTy.AddrSpace = SI->getPointerAddressSpace(); } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { AccessTy.AddrSpace = LI->getPointerAddressSpace(); + } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) { + AccessTy.AddrSpace = RMW->getPointerAddressSpace(); + } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) { + AccessTy.AddrSpace = CmpX->getPointerAddressSpace(); } // All pointers have the same requirements, so canonicalize them to an @@ -832,7 +907,7 @@ static bool isHighCostExpansion(const SCEV *S, /// If any of the instructions is the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool -DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { @@ -875,44 +950,44 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); // Get the cost of the scaling factor used in F for LU. static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, - const LSRUse &LU, const Formula &F); + const LSRUse &LU, const Formula &F, + const Loop &L); namespace { /// This class is used to measure and compare candidate formulae. class Cost { - /// TODO: Some of these could be merged. Also, a lexical ordering - /// isn't always optimal. - unsigned NumRegs; - unsigned AddRecCost; - unsigned NumIVMuls; - unsigned NumBaseAdds; - unsigned ImmCost; - unsigned SetupCost; - unsigned ScaleCost; + TargetTransformInfo::LSRCost C; public: - Cost() - : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), - SetupCost(0), ScaleCost(0) {} + Cost() { + C.Insns = 0; + C.NumRegs = 0; + C.AddRecCost = 0; + C.NumIVMuls = 0; + C.NumBaseAdds = 0; + C.ImmCost = 0; + C.SetupCost = 0; + C.ScaleCost = 0; + } - bool operator<(const Cost &Other) const; + bool isLess(Cost &Other, const TargetTransformInfo &TTI); void Lose(); #ifndef NDEBUG // Once any of the metrics loses, they must all remain losers. bool isValid() { - return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds - | ImmCost | SetupCost | ScaleCost) != ~0u) - || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds - & ImmCost & SetupCost & ScaleCost) == ~0u); + return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds + | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) + || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds + & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); } #endif bool isLoser() { assert(isValid() && "invalid cost"); - return NumRegs == ~0u; + return C.NumRegs == ~0u; } void RateFormula(const TargetTransformInfo &TTI, @@ -1067,7 +1142,8 @@ public: } bool HasFormulaWithSameRegs(const Formula &F) const; - bool InsertFormula(const Formula &F); + float getNotSelectedProbability(const SCEV *Reg) const; + bool InsertFormula(const Formula &F, const Loop &L); void DeleteFormula(Formula &F); void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); @@ -1083,20 +1159,26 @@ void Cost::RateRegister(const SCEV *Reg, const Loop *L, ScalarEvolution &SE, DominatorTree &DT) { if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { - // If this is an addrec for another loop, don't second-guess its addrec phi - // nodes. LSR isn't currently smart enough to reason about more than one - // loop at a time. LSR has already run on inner loops, will not run on outer - // loops, and cannot be expected to change sibling loops. + // If this is an addrec for another loop, it should be an invariant + // with respect to L since L is the innermost loop (at least + // for now LSR only handles innermost loops). if (AR->getLoop() != L) { // If the AddRec exists, consider it's register free and leave it alone. if (isExistingPhi(AR, SE)) return; - // Otherwise, do not consider this formula at all. - Lose(); + // It is bad to allow LSR for current loop to add induction variables + // for its sibling loops. + if (!AR->getLoop()->contains(L)) { + Lose(); + return; + } + + // Otherwise, it will be an invariant with respect to Loop L. + ++C.NumRegs; return; } - AddRecCost += 1; /// TODO: This should be a function of the stride. + C.AddRecCost += 1; /// TODO: This should be a function of the stride. // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. @@ -1108,7 +1190,7 @@ void Cost::RateRegister(const SCEV *Reg, } } } - ++NumRegs; + ++C.NumRegs; // Rough heuristic; favor registers which don't require extra setup // instructions in the preheader. @@ -1117,9 +1199,9 @@ void Cost::RateRegister(const SCEV *Reg, !(isa<SCEVAddRecExpr>(Reg) && (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) - ++SetupCost; + ++C.SetupCost; - NumIVMuls += isa<SCEVMulExpr>(Reg) && + C.NumIVMuls += isa<SCEVMulExpr>(Reg) && SE.hasComputableLoopEvolution(Reg, L); } @@ -1150,8 +1232,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSetImpl<const SCEV *> *LoserRegs) { - assert(F.isCanonical() && "Cost is accurate only for canonical formula"); + assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); // Tally up the registers. + unsigned PrevAddRecCost = C.AddRecCost; + unsigned PrevNumRegs = C.NumRegs; + unsigned PrevNumBaseAdds = C.NumBaseAdds; if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Lose(); @@ -1176,73 +1261,113 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += + C.NumBaseAdds += NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); - NumBaseAdds += (F.UnfoldedOffset != 0); + C.NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. - ScaleCost += getScalingFactorCost(TTI, LU, F); + C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L); // Tally up the non-zero immediates. for (const LSRFixup &Fixup : LU.Fixups) { int64_t O = Fixup.Offset; int64_t Offset = (uint64_t)O + F.BaseOffset; if (F.BaseGV) - ImmCost += 64; // Handle symbolic values conservatively. + C.ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) - ImmCost += APInt(64, Offset, true).getMinSignedBits(); + C.ImmCost += APInt(64, Offset, true).getMinSignedBits(); // Check with target if this offset with this instruction is // specifically not supported. if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) && !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) - NumBaseAdds++; + C.NumBaseAdds++; + } + + // If we don't count instruction cost exit here. + if (!InsnsCost) { + assert(isValid() && "invalid cost"); + return; + } + + // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as + // additional instruction (at least fill). + unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; + if (C.NumRegs > TTIRegNum) { + // Cost already exceeded TTIRegNum, then only newly added register can add + // new instructions. + if (PrevNumRegs > TTIRegNum) + C.Insns += (C.NumRegs - PrevNumRegs); + else + C.Insns += (C.NumRegs - TTIRegNum); } + + // If ICmpZero formula ends with not 0, it could not be replaced by + // just add or sub. We'll need to compare final result of AddRec. + // That means we'll need an additional instruction. + // For -10 + {0, +, 1}: + // i = i + 1; + // cmp i, 10 + // + // For {-10, +, 1}: + // i = i + 1; + if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd()) + C.Insns++; + // Each new AddRec adds 1 instruction to calculation. + C.Insns += (C.AddRecCost - PrevAddRecCost); + + // BaseAdds adds instructions for unfolded registers. + if (LU.Kind != LSRUse::ICmpZero) + C.Insns += C.NumBaseAdds - PrevNumBaseAdds; assert(isValid() && "invalid cost"); } /// Set this cost to a losing value. void Cost::Lose() { - NumRegs = ~0u; - AddRecCost = ~0u; - NumIVMuls = ~0u; - NumBaseAdds = ~0u; - ImmCost = ~0u; - SetupCost = ~0u; - ScaleCost = ~0u; + C.Insns = ~0u; + C.NumRegs = ~0u; + C.AddRecCost = ~0u; + C.NumIVMuls = ~0u; + C.NumBaseAdds = ~0u; + C.ImmCost = ~0u; + C.SetupCost = ~0u; + C.ScaleCost = ~0u; } /// Choose the lower cost. -bool Cost::operator<(const Cost &Other) const { - return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost, - ImmCost, SetupCost) < - std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls, - Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost, - Other.SetupCost); +bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) { + if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && + C.Insns != Other.C.Insns) + return C.Insns < Other.C.Insns; + return TTI.isLSRCostLess(C, Other.C); } void Cost::print(raw_ostream &OS) const { - OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); - if (AddRecCost != 0) - OS << ", with addrec cost " << AddRecCost; - if (NumIVMuls != 0) - OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); - if (NumBaseAdds != 0) - OS << ", plus " << NumBaseAdds << " base add" - << (NumBaseAdds == 1 ? "" : "s"); - if (ScaleCost != 0) - OS << ", plus " << ScaleCost << " scale cost"; - if (ImmCost != 0) - OS << ", plus " << ImmCost << " imm cost"; - if (SetupCost != 0) - OS << ", plus " << SetupCost << " setup cost"; + if (InsnsCost) + OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); + OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); + if (C.AddRecCost != 0) + OS << ", with addrec cost " << C.AddRecCost; + if (C.NumIVMuls != 0) + OS << ", plus " << C.NumIVMuls << " IV mul" + << (C.NumIVMuls == 1 ? "" : "s"); + if (C.NumBaseAdds != 0) + OS << ", plus " << C.NumBaseAdds << " base add" + << (C.NumBaseAdds == 1 ? "" : "s"); + if (C.ScaleCost != 0) + OS << ", plus " << C.ScaleCost << " scale cost"; + if (C.ImmCost != 0) + OS << ", plus " << C.ImmCost << " imm cost"; + if (C.SetupCost != 0) + OS << ", plus " << C.SetupCost << " setup cost"; } -LLVM_DUMP_METHOD -void Cost::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void Cost::dump() const { print(errs()); errs() << '\n'; } +#endif LSRFixup::LSRFixup() : UserInst(nullptr), OperandValToReplace(nullptr), @@ -1285,10 +1410,11 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } -LLVM_DUMP_METHOD -void LSRFixup::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LSRFixup::dump() const { print(errs()); errs() << '\n'; } +#endif /// Test whether this use as a formula which has the same registers as the given /// formula. @@ -1300,10 +1426,19 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { return Uniquifier.count(Key); } +/// The function returns a probability of selecting formula without Reg. +float LSRUse::getNotSelectedProbability(const SCEV *Reg) const { + unsigned FNum = 0; + for (const Formula &F : Formulae) + if (F.referencesReg(Reg)) + FNum++; + return ((float)(Formulae.size() - FNum)) / Formulae.size(); +} + /// If the given formula has not yet been inserted, add it to the list, and /// return true. Return false otherwise. The formula must be in canonical form. -bool LSRUse::InsertFormula(const Formula &F) { - assert(F.isCanonical() && "Invalid canonical representation"); +bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { + assert(F.isCanonical(L) && "Invalid canonical representation"); if (!Formulae.empty() && RigidFormula) return false; @@ -1391,10 +1526,11 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } -LLVM_DUMP_METHOD -void LSRUse::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LSRUse::dump() const { print(errs()); errs() << '\n'; } +#endif static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, @@ -1472,7 +1608,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, - const Formula &F) { + const Formula &F, const Loop &L) { // For the purpose of isAMCompletelyFolded either having a canonical formula // or a scale not equal to zero is correct. // Problems may arise from non canonical formulae having a scale == 0. @@ -1480,7 +1616,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, // However, when we generate the scaled formulae, we first check that the // scaling factor is profitable before computing the actual ScaledReg for // compile time sake. - assert((F.isCanonical() || F.Scale != 0)); + assert((F.isCanonical(L) || F.Scale != 0)); return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); } @@ -1515,14 +1651,15 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, } static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, - const LSRUse &LU, const Formula &F) { + const LSRUse &LU, const Formula &F, + const Loop &L) { if (!F.Scale) return 0; // If the use is not completely folded in that instruction, we will have to // pay an extra cost only for scale != 1. if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F)) + LU.AccessTy, F, L)) return F.Scale != 1; switch (LU.Kind) { @@ -1718,7 +1855,7 @@ class LSRInstance { void FinalizeChain(IVChain &Chain); void CollectChains(); void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts); + SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1772,6 +1909,8 @@ class LSRInstance { void NarrowSearchSpaceByDetectingSupersets(); void NarrowSearchSpaceByCollapsingUnrolledCode(); void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); + void NarrowSearchSpaceByDeletingCostlyFormulas(); void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); @@ -1792,19 +1931,15 @@ class LSRInstance { const LSRUse &LU, SCEVExpander &Rewriter) const; - Value *Expand(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, + BasicBlock::iterator IP, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; - void Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; + void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); public: @@ -2191,7 +2326,7 @@ LSRInstance::OptimizeLoopTermCond() { dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { const ConstantInt *C = D->getValue(); // Stride of one or negative one can have reuse with non-addresses. - if (C->isOne() || C->isAllOnesValue()) + if (C->isOne() || C->isMinusOne()) goto decline_post_inc; // Avoid weird situations. if (C->getValue().getMinSignedBits() >= 64 || @@ -2492,7 +2627,12 @@ static Value *getWideOperand(Value *Oper) { static bool isCompatibleIVType(Value *LVal, Value *RVal) { Type *LType = LVal->getType(); Type *RType = RVal->getType(); - return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy()); + return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() && + // Different address spaces means (possibly) + // different types of the pointer implementation, + // e.g. i16 vs i32 so disallow that. + (LType->getPointerAddressSpace() == + RType->getPointerAddressSpace())); } /// Return an approximation of this SCEV expression's "base", or NULL for any @@ -2881,7 +3021,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. const IVInc &Head = Chain.Incs[0]; @@ -2989,8 +3129,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { User::op_iterator UseI = find(UserInst->operands(), U.getOperandValToReplace()); assert(UseI != UserInst->op_end() && "cannot find IV operand"); - if (IVIncSet.count(UseI)) + if (IVIncSet.count(UseI)) { + DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n'); continue; + } LSRUse::KindType Kind = LSRUse::Basic; MemAccessTy AccessTy; @@ -3025,8 +3167,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. - N = TransformForPostIncUse(Normalize, N, CI, nullptr, - TmpPostIncLoops, SE, DT); + N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -3108,7 +3249,8 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { // Do not insert formula that we will not be able to expand. assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && "Formula is illegal"); - if (!LU.InsertFormula(F)) + + if (!LU.InsertFormula(F, *L)) return false; CountRegisters(F, LUIdx); @@ -3347,7 +3489,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, F.BaseRegs.push_back(*J); // We may have changed the number of register in base regs, adjust the // formula accordingly. - F.canonicalize(); + F.canonicalize(*L); if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like @@ -3359,7 +3501,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, /// Split out subexpressions from adds and the bases of addrecs. void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth) { - assert(Base.isCanonical() && "Input must be in the canonical form"); + assert(Base.isCanonical(*L) && "Input must be in the canonical form"); // Arbitrarily cap recursion to protect compile time. if (Depth >= 3) return; @@ -3400,7 +3542,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // rather than proceed with zero in a register. if (!Sum->isZero()) { F.BaseRegs.push_back(Sum); - F.canonicalize(); + F.canonicalize(*L); (void)InsertFormula(LU, LUIdx, F); } } @@ -3457,7 +3599,7 @@ void LSRInstance::GenerateConstantOffsetsImpl( F.ScaledReg = nullptr; } else F.deleteBaseReg(F.BaseRegs[Idx]); - F.canonicalize(); + F.canonicalize(*L); } else if (IsScaledReg) F.ScaledReg = NewG; else @@ -3620,10 +3762,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV) continue; - // For each addrec base reg, apply the scale, if possible. - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) - if (const SCEVAddRecExpr *AR = - dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) { + // For each addrec base reg, if its loop is current loop, apply the scale. + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]); + if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) { const SCEV *FactorS = SE.getConstant(IntTy, Factor); if (FactorS->isZero()) continue; @@ -3637,11 +3779,17 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // The canonical representation of 1*reg is reg, which is already in // Base. In that case, do not try to insert the formula, it will be // rejected anyway. - if (F.Scale == 1 && F.BaseRegs.empty()) + if (F.Scale == 1 && (F.BaseRegs.empty() || + (AR->getLoop() != L && LU.AllFixupsOutsideLoop))) continue; + // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate + // non canonical Formula with ScaledReg's loop not being L. + if (F.Scale == 1 && LU.AllFixupsOutsideLoop) + F.canonicalize(*L); (void)InsertFormula(LU, LUIdx, F); } } + } } } @@ -3668,6 +3816,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) continue; + F.canonicalize(*L); (void)InsertFormula(LU, LUIdx, F); } } @@ -3697,10 +3846,11 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } -LLVM_DUMP_METHOD -void WorkItem::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void WorkItem::dump() const { print(errs()); errs() << '\n'; } +#endif /// Look for registers which are a constant distance apart and try to form reuse /// opportunities between them. @@ -3764,8 +3914,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // Compute the difference between the two. int64_t Imm = (uint64_t)JImm - M->first; - for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; - LUIdx = UsedByIndices.find_next(LUIdx)) + for (unsigned LUIdx : UsedByIndices.set_bits()) // Make a memo of this use, offset, and register tuple. if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); @@ -3821,7 +3970,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { continue; // OK, looks good. - NewF.canonicalize(); + NewF.canonicalize(*this->L); (void)InsertFormula(LU, LUIdx, NewF); } else { // Use the immediate in a base register. @@ -3853,7 +4002,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { goto skip_formula; // Ok, looks good. - NewF.canonicalize(); + NewF.canonicalize(*this->L); (void)InsertFormula(LU, LUIdx, NewF); break; skip_formula:; @@ -3967,7 +4116,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Cost CostBest; Regs.clear(); CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); - if (CostF < CostBest) + if (CostF.isLess(CostBest, TTI)) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -4165,6 +4314,242 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ } } +/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. +/// Pick the best one and delete the others. +/// This narrowing heuristic is to keep as many formulae with different +/// Scale and ScaledReg pair as possible while narrowing the search space. +/// The benefit is that it is more likely to find out a better solution +/// from a formulae set with more Scale and ScaledReg variations than +/// a formulae set with the same Scale and ScaledReg. The picking winner +/// reg heurstic will often keep the formulae with the same Scale and +/// ScaledReg and filter others, and we want to avoid that if possible. +void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; + + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by choosing the best Formula " + "from the Formulae with the same Scale and ScaledReg.\n"); + + // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. + typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy; + BestFormulaeTy BestFormulae; +#ifndef NDEBUG + bool ChangedFormulae = false; +#endif + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); + + // Return true if Formula FA is better than Formula FB. + auto IsBetterThan = [&](Formula &FA, Formula &FB) { + // First we will try to choose the Formula with fewer new registers. + // For a register used by current Formula, the more the register is + // shared among LSRUses, the less we increase the register number + // counter of the formula. + size_t FARegNum = 0; + for (const SCEV *Reg : FA.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FARegNum += (NumUses - UsedByIndices.count() + 1); + } + size_t FBRegNum = 0; + for (const SCEV *Reg : FB.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FBRegNum += (NumUses - UsedByIndices.count() + 1); + } + if (FARegNum != FBRegNum) + return FARegNum < FBRegNum; + + // If the new register numbers are the same, choose the Formula with + // less Cost. + Cost CostFA, CostFB; + Regs.clear(); + CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU); + Regs.clear(); + CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU); + return CostFA.isLess(CostFB, TTI); + }; + + bool Any = false; + for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; + ++FIdx) { + Formula &F = LU.Formulae[FIdx]; + if (!F.ScaledReg) + continue; + auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); + if (P.second) + continue; + + Formula &Best = LU.Formulae[P.first->second]; + if (IsBetterThan(F, Best)) + std::swap(F, Best); + DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); + dbgs() << "\n" + " in favor of formula "; + Best.print(dbgs()); dbgs() << '\n'); +#ifndef NDEBUG + ChangedFormulae = true; +#endif + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; + } + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); + + // Reset this to prepare for the next use. + BestFormulae.clear(); + } + + DEBUG(if (ChangedFormulae) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); +} + +/// The function delete formulas with high registers number expectation. +/// Assuming we don't know the value of each formula (already delete +/// all inefficient), generate probability of not selecting for each +/// register. +/// For example, +/// Use1: +/// reg(a) + reg({0,+,1}) +/// reg(a) + reg({-1,+,1}) + 1 +/// reg({a,+,1}) +/// Use2: +/// reg(b) + reg({0,+,1}) +/// reg(b) + reg({-1,+,1}) + 1 +/// reg({b,+,1}) +/// Use3: +/// reg(c) + reg(b) + reg({0,+,1}) +/// reg(c) + reg({b,+,1}) +/// +/// Probability of not selecting +/// Use1 Use2 Use3 +/// reg(a) (1/3) * 1 * 1 +/// reg(b) 1 * (1/3) * (1/2) +/// reg({0,+,1}) (2/3) * (2/3) * (1/2) +/// reg({-1,+,1}) (2/3) * (2/3) * 1 +/// reg({a,+,1}) (2/3) * 1 * 1 +/// reg({b,+,1}) 1 * (2/3) * (2/3) +/// reg(c) 1 * 1 * 0 +/// +/// Now count registers number mathematical expectation for each formula: +/// Note that for each use we exclude probability if not selecting for the use. +/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding +/// probabilty 1/3 of not selecting for Use1). +/// Use1: +/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted +/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted +/// reg({a,+,1}) 1 +/// Use2: +/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted +/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted +/// reg({b,+,1}) 2/3 +/// Use3: +/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted +/// reg(c) + reg({b,+,1}) 1 + 2/3 + +void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; + // Ok, we have too many of formulae on our hands to conveniently handle. + // Use a rough heuristic to thin out the list. + + // Set of Regs wich will be 100% used in final solution. + // Used in each formula of a solution (in example above this is reg(c)). + // We can skip them in calculations. + SmallPtrSet<const SCEV *, 4> UniqRegs; + DEBUG(dbgs() << "The search space is too complex.\n"); + + // Map each register to probability of not selecting + DenseMap <const SCEV *, float> RegNumMap; + for (const SCEV *Reg : RegUses) { + if (UniqRegs.count(Reg)) + continue; + float PNotSel = 1; + for (const LSRUse &LU : Uses) { + if (!LU.Regs.count(Reg)) + continue; + float P = LU.getNotSelectedProbability(Reg); + if (P != 0.0) + PNotSel *= P; + else + UniqRegs.insert(Reg); + } + RegNumMap.insert(std::make_pair(Reg, PNotSel)); + } + + DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n"); + + // Delete formulas where registers number expectation is high. + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + // If nothing to delete - continue. + if (LU.Formulae.size() < 2) + continue; + // This is temporary solution to test performance. Float should be + // replaced with round independent type (based on integers) to avoid + // different results for different target builds. + float FMinRegNum = LU.Formulae[0].getNumRegs(); + float FMinARegNum = LU.Formulae[0].getNumRegs(); + size_t MinIdx = 0; + for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { + Formula &F = LU.Formulae[i]; + float FRegNum = 0; + float FARegNum = 0; + for (const SCEV *BaseReg : F.BaseRegs) { + if (UniqRegs.count(BaseReg)) + continue; + FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); + if (isa<SCEVAddRecExpr>(BaseReg)) + FARegNum += + RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); + } + if (const SCEV *ScaledReg = F.ScaledReg) { + if (!UniqRegs.count(ScaledReg)) { + FRegNum += + RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); + if (isa<SCEVAddRecExpr>(ScaledReg)) + FARegNum += + RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); + } + } + if (FMinRegNum > FRegNum || + (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) { + FMinRegNum = FRegNum; + FMinARegNum = FARegNum; + MinIdx = i; + } + } + DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs()); + dbgs() << " with min reg num " << FMinRegNum << '\n'); + if (MinIdx != 0) + std::swap(LU.Formulae[MinIdx], LU.Formulae[0]); + while (LU.Formulae.size() != 1) { + DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs()); + dbgs() << '\n'); + LU.Formulae.pop_back(); + } + LU.RecomputeRegs(LUIdx, RegUses); + assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula"); + Formula &F = LU.Formulae[0]; + DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n'); + // When we choose the formula, the regs become unique. + UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + if (F.ScaledReg) + UniqRegs.insert(F.ScaledReg); + } + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); +} + + /// Pick a register which seems likely to be profitable, and then in any use /// which has any reference to that register, delete all formulae which do not /// reference that register. @@ -4237,7 +4622,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { NarrowSearchSpaceByDetectingSupersets(); NarrowSearchSpaceByCollapsingUnrolledCode(); NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); - NarrowSearchSpaceByPickingWinnerRegs(); + if (FilterSameScaledReg) + NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); + if (LSRExpNarrow) + NarrowSearchSpaceByDeletingCostlyFormulas(); + else + NarrowSearchSpaceByPickingWinnerRegs(); } /// This is the recursive solver. @@ -4294,7 +4684,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, NewCost = CurCost; NewRegs = CurRegs; NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU); - if (NewCost < SolutionCost) { + if (NewCost.isLess(SolutionCost, TTI)) { Workspace.push_back(&F); if (Workspace.size() != Uses.size()) { SolveRecurse(Solution, SolutionCost, Workspace, NewCost, @@ -4476,12 +4866,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). -Value *LSRInstance::Expand(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, +Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; @@ -4515,11 +4903,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, assert(!Reg->isZero() && "Zero allocated in a base register!"); // If we're expanding for a post-inc user, make the post-inc adjustment. - PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); - Reg = TransformForPostIncUse(Denormalize, Reg, - LF.UserInst, LF.OperandValToReplace, - Loops, SE, DT); - + Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE); Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); } @@ -4530,9 +4914,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, // If we're expanding for a post-inc user, make the post-inc adjustment. PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); - ScaledS = TransformForPostIncUse(Denormalize, ScaledS, - LF.UserInst, LF.OperandValToReplace, - Loops, SE, DT); + ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE); if (LU.Kind == LSRUse::ICmpZero) { // Expand ScaleReg as if it was part of the base regs. @@ -4662,12 +5044,9 @@ Value *LSRInstance::Expand(const LSRUse &LU, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, - const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::RewriteForPHI( + PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, + SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -4739,11 +5118,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. -void LSRInstance::Rewrite(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { @@ -4781,7 +5158,7 @@ void LSRInstance::ImplementSolution( const SmallVectorImpl<const Formula *> &Solution) { // Keep track of instructions we may have made dead, so that // we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr"); @@ -4975,10 +5352,11 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } -LLVM_DUMP_METHOD -void LSRInstance::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LSRInstance::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -5030,7 +5408,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Rewriter(SE, DL, "lsr"); #ifndef NDEBUG |