diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 640 |
1 files changed, 350 insertions, 290 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 70bd9d3..194587a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -53,29 +53,64 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <map> +#include <tuple> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "loop-reduce" @@ -123,8 +158,9 @@ struct MemAccessTy { bool operator!=(MemAccessTy Other) const { return !(*this == Other); } - static MemAccessTy getUnknown(LLVMContext &Ctx) { - return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace); + static MemAccessTy getUnknown(LLVMContext &Ctx, + unsigned AS = UnknownAddressSpace) { + return MemAccessTy(Type::getVoidTy(Ctx), AS); } }; @@ -139,7 +175,7 @@ public: void dump() const; }; -} +} // end anonymous namespace void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; @@ -178,7 +214,7 @@ public: const_iterator end() const { return RegSequence.end(); } }; -} +} // end anonymous namespace void RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) { @@ -210,7 +246,7 @@ RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) { SmallBitVector &UsedByIndices = Pair.second.UsedByIndices; if (LUIdx < UsedByIndices.size()) UsedByIndices[LUIdx] = - LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0; + LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false; UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); } } @@ -301,7 +337,7 @@ struct Formula { void dump() const; }; -} +} // end anonymous namespace /// Recursion helper for initialMatch. static void DoInitialMatch(const SCEV *S, Loop *L, @@ -323,7 +359,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L, // Look at addrec operands. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - if (!AR->getStart()->isZero()) { + if (!AR->getStart()->isZero() && AR->isAffine()) { DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), @@ -446,8 +482,7 @@ void Formula::deleteBaseReg(const SCEV *&S) { /// Test if this formula references the given register. bool Formula::referencesReg(const SCEV *S) const { - return S == ScaledReg || - std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); + return S == ScaledReg || is_contained(BaseRegs, S); } /// Test whether this formula uses registers which are used by uses other than @@ -567,7 +602,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { - if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { + if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) { const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); if (!Step) return nullptr; @@ -822,8 +857,10 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { } namespace { + class LSRUse; -} + +} // end anonymous namespace /// \brief Check if the addressing mode defined by \p F is completely /// folded in \p LU at isel time. @@ -883,7 +920,6 @@ public: SmallPtrSetImpl<const SCEV *> &Regs, const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, - const SmallVectorImpl<int64_t> &Offsets, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr); @@ -902,8 +938,144 @@ private: ScalarEvolution &SE, DominatorTree &DT, SmallPtrSetImpl<const SCEV *> *LoserRegs); }; + +/// An operand value in an instruction which is to be replaced with some +/// equivalent, possibly strength-reduced, replacement. +struct LSRFixup { + /// The instruction which will be updated. + Instruction *UserInst; -} + /// The operand of the instruction which will be replaced. The operand may be + /// used more than once; every instance will be replaced. + Value *OperandValToReplace; + + /// If this user is to use the post-incremented value of an induction + /// variable, this variable is non-null and holds the loop associated with the + /// induction variable. + PostIncLoopSet PostIncLoops; + + /// A constant offset to be added to the LSRUse expression. This allows + /// multiple fixups to share the same LSRUse with different offsets, for + /// example in an unrolled loop. + int64_t Offset; + + bool isUseFullyOutsideLoop(const Loop *L) const; + + LSRFixup(); + + void print(raw_ostream &OS) const; + void dump() const; +}; + +/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted +/// SmallVectors of const SCEV*. +struct UniquifierDenseMapInfo { + static SmallVector<const SCEV *, 4> getEmptyKey() { + SmallVector<const SCEV *, 4> V; + V.push_back(reinterpret_cast<const SCEV *>(-1)); + return V; + } + + static SmallVector<const SCEV *, 4> getTombstoneKey() { + SmallVector<const SCEV *, 4> V; + V.push_back(reinterpret_cast<const SCEV *>(-2)); + return V; + } + + static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { + return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); + } + + static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, + const SmallVector<const SCEV *, 4> &RHS) { + return LHS == RHS; + } +}; + +/// This class holds the state that LSR keeps for each use in IVUsers, as well +/// as uses invented by LSR itself. It includes information about what kinds of +/// things can be folded into the user, information about the user itself, and +/// information about how the use may be satisfied. TODO: Represent multiple +/// users of the same expression in common? +class LSRUse { + DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; + +public: + /// An enum for a kind of use, indicating what types of scaled and immediate + /// operands it might support. + enum KindType { + Basic, ///< A normal use, with no folding. + Special, ///< A special case of basic, allowing -1 scales. + Address, ///< An address use; folding according to TargetLowering + ICmpZero ///< An equality icmp with both operands folded into one. + // TODO: Add a generic icmp too? + }; + + typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair; + + KindType Kind; + MemAccessTy AccessTy; + + /// The list of operands which are to be replaced. + SmallVector<LSRFixup, 8> Fixups; + + /// Keep track of the min and max offsets of the fixups. + int64_t MinOffset; + int64_t MaxOffset; + + /// This records whether all of the fixups using this LSRUse are outside of + /// the loop, in which case some special-case heuristics may be used. + bool AllFixupsOutsideLoop; + + /// RigidFormula is set to true to guarantee that this use will be associated + /// with a single formula--the one that initially matched. Some SCEV + /// expressions cannot be expanded. This allows LSR to consider the registers + /// used by those expressions without the need to expand them later after + /// changing the formula. + bool RigidFormula; + + /// This records the widest use type for any fixup using this + /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max + /// fixup widths to be equivalent, because the narrower one may be relying on + /// the implicit truncation to truncate away bogus bits. + Type *WidestFixupType; + + /// A list of ways to build a value that can satisfy this user. After the + /// list is populated, one of these is selected heuristically and used to + /// formulate a replacement for OperandValToReplace in UserInst. + SmallVector<Formula, 12> Formulae; + + /// The set of register candidates used by all formulae in this LSRUse. + SmallPtrSet<const SCEV *, 4> Regs; + + LSRUse(KindType K, MemAccessTy AT) + : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), + AllFixupsOutsideLoop(true), RigidFormula(false), + WidestFixupType(nullptr) {} + + LSRFixup &getNewFixup() { + Fixups.push_back(LSRFixup()); + return Fixups.back(); + } + + void pushFixup(LSRFixup &f) { + Fixups.push_back(f); + if (f.Offset > MaxOffset) + MaxOffset = f.Offset; + if (f.Offset < MinOffset) + MinOffset = f.Offset; + } + + bool HasFormulaWithSameRegs(const Formula &F) const; + bool InsertFormula(const Formula &F); + void DeleteFormula(Formula &F); + void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); + + void print(raw_ostream &OS) const; + void dump() const; +}; + +} // end anonymous namespace /// Tally up interesting quantities from the given register. void Cost::RateRegister(const SCEV *Reg, @@ -975,7 +1147,6 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Regs, const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, - const SmallVectorImpl<int64_t> &Offsets, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSetImpl<const SCEV *> *LoserRegs) { @@ -1013,13 +1184,20 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, ScaleCost += getScalingFactorCost(TTI, LU, F); // Tally up the non-zero immediates. - for (int64_t O : Offsets) { + for (const LSRFixup &Fixup : LU.Fixups) { + int64_t O = Fixup.Offset; int64_t Offset = (uint64_t)O + F.BaseOffset; if (F.BaseGV) ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) ImmCost += APInt(64, Offset, true).getMinSignedBits(); + + // Check with target if this offset with this instruction is + // specifically not supported. + if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) && + !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) + NumBaseAdds++; } assert(isValid() && "invalid cost"); } @@ -1066,44 +1244,8 @@ void Cost::dump() const { print(errs()); errs() << '\n'; } -namespace { - -/// An operand value in an instruction which is to be replaced with some -/// equivalent, possibly strength-reduced, replacement. -struct LSRFixup { - /// The instruction which will be updated. - Instruction *UserInst; - - /// The operand of the instruction which will be replaced. The operand may be - /// used more than once; every instance will be replaced. - Value *OperandValToReplace; - - /// If this user is to use the post-incremented value of an induction - /// variable, this variable is non-null and holds the loop associated with the - /// induction variable. - PostIncLoopSet PostIncLoops; - - /// The index of the LSRUse describing the expression which this fixup needs, - /// minus an offset (below). - size_t LUIdx; - - /// A constant offset to be added to the LSRUse expression. This allows - /// multiple fixups to share the same LSRUse with different offsets, for - /// example in an unrolled loop. - int64_t Offset; - - bool isUseFullyOutsideLoop(const Loop *L) const; - - LSRFixup(); - - void print(raw_ostream &OS) const; - void dump() const; -}; - -} - LSRFixup::LSRFixup() - : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)), + : UserInst(nullptr), OperandValToReplace(nullptr), Offset(0) {} /// Test whether this fixup always uses its value outside of the given loop. @@ -1139,9 +1281,6 @@ void LSRFixup::print(raw_ostream &OS) const { PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false); } - if (LUIdx != ~size_t(0)) - OS << ", LUIdx=" << LUIdx; - if (Offset != 0) OS << ", Offset=" << Offset; } @@ -1151,102 +1290,6 @@ void LSRFixup::dump() const { print(errs()); errs() << '\n'; } -namespace { - -/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted -/// SmallVectors of const SCEV*. -struct UniquifierDenseMapInfo { - static SmallVector<const SCEV *, 4> getEmptyKey() { - SmallVector<const SCEV *, 4> V; - V.push_back(reinterpret_cast<const SCEV *>(-1)); - return V; - } - - static SmallVector<const SCEV *, 4> getTombstoneKey() { - SmallVector<const SCEV *, 4> V; - V.push_back(reinterpret_cast<const SCEV *>(-2)); - return V; - } - - static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { - return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); - } - - static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, - const SmallVector<const SCEV *, 4> &RHS) { - return LHS == RHS; - } -}; - -/// This class holds the state that LSR keeps for each use in IVUsers, as well -/// as uses invented by LSR itself. It includes information about what kinds of -/// things can be folded into the user, information about the user itself, and -/// information about how the use may be satisfied. TODO: Represent multiple -/// users of the same expression in common? -class LSRUse { - DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; - -public: - /// An enum for a kind of use, indicating what types of scaled and immediate - /// operands it might support. - enum KindType { - Basic, ///< A normal use, with no folding. - Special, ///< A special case of basic, allowing -1 scales. - Address, ///< An address use; folding according to TargetLowering - ICmpZero ///< An equality icmp with both operands folded into one. - // TODO: Add a generic icmp too? - }; - - typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair; - - KindType Kind; - MemAccessTy AccessTy; - - SmallVector<int64_t, 8> Offsets; - int64_t MinOffset; - int64_t MaxOffset; - - /// This records whether all of the fixups using this LSRUse are outside of - /// the loop, in which case some special-case heuristics may be used. - bool AllFixupsOutsideLoop; - - /// RigidFormula is set to true to guarantee that this use will be associated - /// with a single formula--the one that initially matched. Some SCEV - /// expressions cannot be expanded. This allows LSR to consider the registers - /// used by those expressions without the need to expand them later after - /// changing the formula. - bool RigidFormula; - - /// This records the widest use type for any fixup using this - /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max - /// fixup widths to be equivalent, because the narrower one may be relying on - /// the implicit truncation to truncate away bogus bits. - Type *WidestFixupType; - - /// A list of ways to build a value that can satisfy this user. After the - /// list is populated, one of these is selected heuristically and used to - /// formulate a replacement for OperandValToReplace in UserInst. - SmallVector<Formula, 12> Formulae; - - /// The set of register candidates used by all formulae in this LSRUse. - SmallPtrSet<const SCEV *, 4> Regs; - - LSRUse(KindType K, MemAccessTy AT) - : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true), RigidFormula(false), - WidestFixupType(nullptr) {} - - bool HasFormulaWithSameRegs(const Formula &F) const; - bool InsertFormula(const Formula &F); - void DeleteFormula(Formula &F); - void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); - - void print(raw_ostream &OS) const; - void dump() const; -}; - -} - /// Test whether this use as a formula which has the same registers as the given /// formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { @@ -1334,9 +1377,9 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", Offsets={"; bool NeedComma = false; - for (int64_t O : Offsets) { + for (const LSRFixup &Fixup : Fixups) { if (NeedComma) OS << ','; - OS << O; + OS << Fixup.Offset; NeedComma = true; } OS << '}'; @@ -1638,14 +1681,16 @@ class LSRInstance { Instruction *IVIncInsertPos; /// Interesting factors between use strides. - SmallSetVector<int64_t, 8> Factors; + /// + /// We explicitly use a SetVector which contains a SmallSet, instead of the + /// default, a SmallDenseSet, because we need to use the full range of + /// int64_ts, and there's currently no good way of doing that with + /// SmallDenseSet. + SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors; /// Interesting use types, to facilitate truncation reuse. SmallSetVector<Type *, 4> Types; - /// The list of operands which are to be replaced. - SmallVector<LSRFixup, 16> Fixups; - /// The list of interesting uses. SmallVector<LSRUse, 16> Uses; @@ -1678,11 +1723,6 @@ class LSRInstance { void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); - LSRFixup &getNewFixup() { - Fixups.push_back(LSRFixup()); - return Fixups.back(); - } - // Support for sharing of LSRUses between LSRFixups. typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy; UseMapTy UseMap; @@ -1752,16 +1792,16 @@ class LSRInstance { const LSRUse &LU, SCEVExpander &Rewriter) const; - Value *Expand(const LSRFixup &LF, + Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, SmallVectorImpl<WeakVH> &DeadInsts) const; - void RewriteForPHI(PHINode *PN, const LSRFixup &LF, + void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, SmallVectorImpl<WeakVH> &DeadInsts) const; - void Rewrite(const LSRFixup &LF, + void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, SmallVectorImpl<WeakVH> &DeadInsts) const; @@ -1780,7 +1820,7 @@ public: void dump() const; }; -} +} // end anonymous namespace /// If IV is used in a int-to-float cast inside the loop then try to eliminate /// the cast operation. @@ -2068,10 +2108,30 @@ void LSRInstance::OptimizeLoopTermCond() { SmallPtrSet<Instruction *, 4> PostIncs; + // We need a different set of heuristics for rotated and non-rotated loops. + // If a loop is rotated then the latch is also the backedge, so inserting + // post-inc expressions just before the latch is ideal. To reduce live ranges + // it also makes sense to rewrite terminating conditions to use post-inc + // expressions. + // + // If the loop is not rotated then the latch is not a backedge; the latch + // check is done in the loop head. Adding post-inc expressions before the + // latch will cause overlapping live-ranges of pre-inc and post-inc expressions + // in the loop body. In this case we do *not* want to use post-inc expressions + // in the latch check, and we want to insert post-inc expressions before + // the backedge. BasicBlock *LatchBlock = L->getLoopLatch(); SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); + if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) { + return LatchBlock != BB; + })) { + // The backedge doesn't exit the loop; treat this as a head-tested loop. + IVIncInsertPos = LatchBlock->getTerminator(); + return; + } + // Otherwise treat this as a rotated loop. for (BasicBlock *ExitingBlock : ExitingBlocks) { // Get the terminating condition for the loop if possible. If we @@ -2220,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, // TODO: Be less conservative when the type is similar and can use the same // addressing modes. if (Kind == LSRUse::Address) { - if (AccessTy != LU.AccessTy) - NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext()); + if (AccessTy.MemTy != LU.AccessTy.MemTy) { + NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), + AccessTy.AddrSpace); + } } // Conservatively assume HasBaseReg is true for now. @@ -2241,8 +2303,6 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, LU.MinOffset = NewMinOffset; LU.MaxOffset = NewMaxOffset; LU.AccessTy = NewAccessTy; - if (NewOffset != LU.Offsets.back()) - LU.Offsets.push_back(NewOffset); return true; } @@ -2279,11 +2339,6 @@ std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr, Uses.push_back(LSRUse(Kind, AccessTy)); LSRUse &LU = Uses[LUIdx]; - // We don't need to track redundant offsets, but we don't need to go out - // of our way here to avoid them. - if (LU.Offsets.empty() || Offset != LU.Offsets.back()) - LU.Offsets.push_back(Offset); - LU.MinOffset = Offset; LU.MaxOffset = Offset; return std::make_pair(LUIdx, Offset); @@ -2500,7 +2555,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr, if (!isa<SCEVConstant>(IncExpr)) { const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr))) - return 0; + return false; } SmallPtrSet<const SCEV*, 8> Processed; @@ -2797,9 +2852,8 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); for (const IVInc &Inc : Chain) { - DEBUG(dbgs() << " Inc: " << Inc.UserInst << "\n"); - auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(), - Inc.IVOperand); + DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); + auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand); assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand"); IVIncSet.insert(UseI); } @@ -2932,39 +2986,34 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { for (const IVStrideUse &U : IU) { Instruction *UserInst = U.getUser(); // Skip IV users that are part of profitable IV Chains. - User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(), - U.getOperandValToReplace()); + User::op_iterator UseI = + find(UserInst->operands(), U.getOperandValToReplace()); assert(UseI != UserInst->op_end() && "cannot find IV operand"); if (IVIncSet.count(UseI)) continue; - // Record the uses. - LSRFixup &LF = getNewFixup(); - LF.UserInst = UserInst; - LF.OperandValToReplace = U.getOperandValToReplace(); - LF.PostIncLoops = U.getPostIncLoops(); - LSRUse::KindType Kind = LSRUse::Basic; MemAccessTy AccessTy; - if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { + if (isAddressUse(UserInst, U.getOperandValToReplace())) { Kind = LSRUse::Address; - AccessTy = getAccessType(LF.UserInst); + AccessTy = getAccessType(UserInst); } const SCEV *S = IU.getExpr(U); - + PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); + // Equality (== and !=) ICmps are special. We can rewrite (i == N) as // (N - i == 0), and this allows (N - i) to be the expression that we work // with rather than just N or i, so we can consider the register // requirements for both N and i at the same time. Limiting this code to // equality icmps is not a problem because all interesting loops use // equality icmps, thanks to IndVarSimplify. - if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst)) + if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) if (CI->isEquality()) { // Swap the operands if needed to put the OperandValToReplace on the // left, for consistency. Value *NV = CI->getOperand(1); - if (NV == LF.OperandValToReplace) { + if (NV == U.getOperandValToReplace()) { CI->setOperand(1, CI->getOperand(0)); CI->setOperand(0, NV); NV = CI->getOperand(1); @@ -2977,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // S is normalized, so normalize N before folding it into S // to keep the result normalized. N = TransformForPostIncUse(Normalize, N, CI, nullptr, - LF.PostIncLoops, SE, DT); + TmpPostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -2990,12 +3039,20 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { Factors.insert(-1); } - // Set up the initial formula for this use. + // Get or create an LSRUse. std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy); - LF.LUIdx = P.first; - LF.Offset = P.second; - LSRUse &LU = Uses[LF.LUIdx]; + size_t LUIdx = P.first; + int64_t Offset = P.second; + LSRUse &LU = Uses[LUIdx]; + + // Record the fixup. + LSRFixup &LF = LU.getNewFixup(); + LF.UserInst = UserInst; + LF.OperandValToReplace = U.getOperandValToReplace(); + LF.PostIncLoops = TmpPostIncLoops; + LF.Offset = Offset; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || SE.getTypeSizeInBits(LU.WidestFixupType) < SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) @@ -3003,8 +3060,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { - InsertInitialFormula(S, LU, LF.LUIdx); - CountRegisters(LU.Formulae.back(), LF.LUIdx); + InsertInitialFormula(S, LU, LUIdx); + CountRegisters(LU.Formulae.back(), LUIdx); } } @@ -3109,6 +3166,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { // Don't bother if the instruction is in a BB which ends in an EHPad. if (UseBB->getTerminator()->isEHPad()) continue; + // Don't bother rewriting PHIs in catchswitch blocks. + if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator())) + continue; // Ignore uses which are part of other SCEV expressions, to avoid // analyzing them multiple times. if (SE.isSCEVable(UserInst->getType())) { @@ -3130,20 +3190,21 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { continue; } - LSRFixup &LF = getNewFixup(); - LF.UserInst = const_cast<Instruction *>(UserInst); - LF.OperandValToReplace = U; std::pair<size_t, int64_t> P = getUse( S, LSRUse::Basic, MemAccessTy()); - LF.LUIdx = P.first; - LF.Offset = P.second; - LSRUse &LU = Uses[LF.LUIdx]; + size_t LUIdx = P.first; + int64_t Offset = P.second; + LSRUse &LU = Uses[LUIdx]; + LSRFixup &LF = LU.getNewFixup(); + LF.UserInst = const_cast<Instruction *>(UserInst); + LF.OperandValToReplace = U; + LF.Offset = Offset; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); if (!LU.WidestFixupType || SE.getTypeSizeInBits(LU.WidestFixupType) < SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) LU.WidestFixupType = LF.OperandValToReplace->getType(); - InsertSupplementalFormula(US, LU, LF.LUIdx); + InsertSupplementalFormula(US, LU, LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; } @@ -3175,7 +3236,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, return nullptr; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. - if (AR->getStart()->isZero()) + if (AR->getStart()->isZero() || !AR->isAffine()) return S; const SCEV *Remainder = CollectSubexprs(AR->getStart(), @@ -3629,7 +3690,7 @@ struct WorkItem { void dump() const; }; -} +} // end anonymous namespace void WorkItem::print(raw_ostream &OS) const { OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx @@ -3872,8 +3933,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // the corresponding bad register from the Regs set. Cost CostF; Regs.clear(); - CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU, - &LoserRegs); + CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs); if (CostF.isLoser()) { // During initial formula generation, undesirable formulae are generated // by uses within other loops that have some non-trivial address mode or @@ -3906,8 +3966,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Cost CostBest; Regs.clear(); - CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE, - DT, LU); + CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); @@ -4053,25 +4112,13 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; - // Update the relocs to reference the new use. - for (LSRFixup &Fixup : Fixups) { - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.BaseOffset; - // Add the new offset to LUThatHas' offset list. - if (LUThatHas->Offsets.back() != Fixup.Offset) { - LUThatHas->Offsets.push_back(Fixup.Offset); - if (Fixup.Offset > LUThatHas->MaxOffset) - LUThatHas->MaxOffset = Fixup.Offset; - if (Fixup.Offset < LUThatHas->MinOffset) - LUThatHas->MinOffset = Fixup.Offset; - } - DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; + // Transfer the fixups of LU to LUThatHas. + for (LSRFixup &Fixup : LU.Fixups) { + Fixup.Offset += F.BaseOffset; + LUThatHas->pushFixup(Fixup); + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } - + // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { @@ -4137,9 +4184,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { for (const SCEV *Reg : RegUses) { if (Taken.count(Reg)) continue; - if (!Best) + if (!Best) { Best = Reg; - else { + BestNum = RegUses.getUsedByIndices(Reg).count(); + } else { unsigned Count = RegUses.getUsedByIndices(Reg).count(); if (Count > BestNum) { Best = Reg; @@ -4229,8 +4277,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (const SCEV *Reg : ReqRegs) { if ((F.ScaledReg && F.ScaledReg == Reg) || - std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) != - F.BaseRegs.end()) { + is_contained(F.BaseRegs, Reg)) { --NumReqRegsToFind; if (NumReqRegsToFind == 0) break; @@ -4246,8 +4293,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, // the current best, prune the search at that point. NewCost = CurCost; NewRegs = CurRegs; - NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT, - LU); + NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU); if (NewCost < SolutionCost) { Workspace.push_back(&F); if (Workspace.size() != Uses.size()) { @@ -4313,7 +4359,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl<Instruction *> &Inputs) const { Instruction *Tentative = &*IP; - for (;;) { + while (true) { bool AllDominate = true; Instruction *BetterPos = nullptr; // Don't bother attempting to insert before a catchswitch, their basic block @@ -4430,12 +4476,12 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). -Value *LSRInstance::Expand(const LSRFixup &LF, +Value *LSRInstance::Expand(const LSRUse &LU, + const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, SmallVectorImpl<WeakVH> &DeadInsts) const { - const LSRUse &LU = Uses[LF.LUIdx]; if (LU.RigidFormula) return LF.OperandValToReplace; @@ -4617,6 +4663,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. void LSRInstance::RewriteForPHI(PHINode *PN, + const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, @@ -4631,7 +4678,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN, // is the canonical backedge for this loop, which complicates post-inc // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(BB->getTerminator())) { + !isa<IndirectBrInst>(BB->getTerminator()) && + !isa<CatchSwitchInst>(BB->getTerminator())) { BasicBlock *Parent = PN->getParent(); Loop *PNLoop = LI.getLoopFor(Parent); if (!PNLoop || Parent != PNLoop->getHeader()) { @@ -4670,7 +4718,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { - Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(), + Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(), Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. @@ -4691,17 +4739,18 @@ void LSRInstance::RewriteForPHI(PHINode *PN, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. -void LSRInstance::Rewrite(const LSRFixup &LF, +void LSRInstance::Rewrite(const LSRUse &LU, + const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, SmallVectorImpl<WeakVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { - RewriteForPHI(PN, LF, F, Rewriter, DeadInsts); + RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts); } else { Value *FullV = - Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts); + Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -4717,7 +4766,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF, // its new value may happen to be equal to LF.OperandValToReplace, in // which case doing replaceUsesOfWith leads to replacing both operands // with the same value. TODO: Reorganize this. - if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero) + if (LU.Kind == LSRUse::ICmpZero) LF.UserInst->setOperand(0, FullV); else LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); @@ -4750,11 +4799,11 @@ void LSRInstance::ImplementSolution( } // Expand the new value definitions and update the users. - for (const LSRFixup &Fixup : Fixups) { - Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts); - - Changed = true; - } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) + for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { + Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts); + Changed = true; + } for (const IVChain &Chain : IVChainVec) { GenerateIVChain(Chain, Rewriter, DeadInsts); @@ -4898,11 +4947,12 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const { void LSRInstance::print_fixups(raw_ostream &OS) const { OS << "LSR is examining the following fixup sites:\n"; - for (const LSRFixup &LF : Fixups) { - dbgs() << " "; - LF.print(OS); - OS << '\n'; - } + for (const LSRUse &LU : Uses) + for (const LSRFixup &LF : LU.Fixups) { + dbgs() << " "; + LF.print(OS); + OS << '\n'; + } } void LSRInstance::print_uses(raw_ostream &OS) const { @@ -4935,6 +4985,7 @@ namespace { class LoopStrengthReduce : public LoopPass { public: static char ID; // Pass ID, replacement for typeid + LoopStrengthReduce(); private: @@ -4942,24 +4993,7 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override; }; -} - -char LoopStrengthReduce::ID = 0; -INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", - "Loop Strength Reduction", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", - "Loop Strength Reduction", false, false) - - -Pass *llvm::createLoopStrengthReducePass() { - return new LoopStrengthReduce(); -} +} // end anonymous namespace LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); @@ -4985,16 +5019,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfoWrapperPass>(); } -bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { - if (skipLoop(L)) - return false; - - auto &IU = getAnalysis<IVUsersWrapperPass>().getIU(); - auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI( - *L->getHeader()->getParent()); +static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, + DominatorTree &DT, LoopInfo &LI, + const TargetTransformInfo &TTI) { bool Changed = false; // Run the main LSR transformation. @@ -5005,15 +5032,11 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector<WeakVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL, - "lsr"); + SCEVExpander Rewriter(SE, DL, "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif - unsigned numFolded = Rewriter.replaceCongruentIVs( - L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts, - &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( - *L->getHeader()->getParent())); + unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI); if (numFolded) { Changed = true; DeleteTriviallyDeadInstructions(DeadInsts); @@ -5022,3 +5045,40 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { } return Changed; } + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { + if (skipLoop(L)) + return false; + + auto &IU = getAnalysis<IVUsersWrapperPass>().getIU(); + auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI( + *L->getHeader()->getParent()); + return ReduceLoopStrength(L, IU, SE, DT, LI, TTI); +} + +PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE, + AR.DT, AR.LI, AR.TTI)) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} + +char LoopStrengthReduce::ID = 0; +INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) + +Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); } |