diff options
Diffstat (limited to 'contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h')
-rw-r--r-- | contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h | 126 |
1 files changed, 94 insertions, 32 deletions
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 69951af..7efdbcc 100644 --- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -60,8 +60,9 @@ private: return Cost; } - /// Estimate the cost overhead of SK_Alternate shuffle. - unsigned getAltShuffleOverhead(Type *Ty) { + /// Estimate a cost of shuffle as a sequence of extract and insert + /// operations. + unsigned getPermuteShuffleOverhead(Type *Ty) { assert(Ty->isVectorTy() && "Can only shuffle vectors"); unsigned Cost = 0; // Shuffle cost is equal to the cost of extracting element from its argument @@ -97,18 +98,13 @@ protected: using TargetTransformInfoImplBase::DL; public: - // Provide value semantics. MSVC requires that we spell all of these out. - BasicTTIImplBase(const BasicTTIImplBase &Arg) - : BaseT(static_cast<const BaseT &>(Arg)) {} - BasicTTIImplBase(BasicTTIImplBase &&Arg) - : BaseT(std::move(static_cast<BaseT &>(Arg))) {} - /// \name Scalar TTI Implementations /// @{ - bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + bool allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const { - MVT M = MVT::getIntegerVT(BitWidth); - return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast); + EVT E = EVT::getIntegerVT(Context, BitWidth); + return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast); } bool hasBranchDivergence() { return false; } @@ -144,6 +140,10 @@ public: return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); } + bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { + return getTLI()->isFoldableMemAccessOffset(I, Offset); + } + bool isTruncateFree(Type *Ty1, Type *Ty2) { return getTLI()->isTruncateFree(Ty1, Ty2); } @@ -279,8 +279,17 @@ public: } // Enable runtime and partial unrolling up to the specified size. - UP.Partial = UP.Runtime = true; - UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; + // Enable using trip count upper bound to unroll loops. + UP.Partial = UP.Runtime = UP.UpperBound = true; + UP.PartialThreshold = MaxOps; + + // Avoid unrolling when optimizing for size. + UP.OptSizeThreshold = 0; + UP.PartialOptSizeThreshold = 0; + + // Set number of instructions optimized when "back edge" + // becomes "fall through" to default value of 2. + UP.BEInsns = 2; } /// @} @@ -299,7 +308,8 @@ public: TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, - TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) { + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { // Check if any of the operands are vector operands. const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -343,8 +353,9 @@ public: unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - if (Kind == TTI::SK_Alternate) { - return getAltShuffleOverhead(Tp); + if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc || + Kind == TTI::SK_PermuteSingleSrc) { + return getPermuteShuffleOverhead(Tp); } return 1; } @@ -915,20 +926,78 @@ public: return LT.first; } - unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; } + unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, + const SCEV *) { + return 0; + } unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { assert(Ty->isVectorTy() && "Expect a vector type"); + Type *ScalarTy = Ty->getVectorElementType(); unsigned NumVecElts = Ty->getVectorNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - unsigned ArithCost = - NumReduxLevels * - static_cast<T *>(this)->getArithmeticInstrCost(Opcode, Ty); - // Assume the pairwise shuffles add a cost. - unsigned ShuffleCost = - NumReduxLevels * (IsPairwise + 1) * - static_cast<T *>(this) - ->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); + // Try to calculate arithmetic and shuffle op costs for reduction operations. + // We're assuming that reduction operation are performing the following way: + // 1. Non-pairwise reduction + // %val1 = shufflevector<n x t> %val, <n x t> %undef, + // <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> + // \----------------v-------------/ \----------v------------/ + // n/2 elements n/2 elements + // %red1 = op <n x t> %val, <n x t> val1 + // After this operation we have a vector %red1 with only maningfull the + // first n/2 elements, the second n/2 elements are undefined and can be + // dropped. All other operations are actually working with the vector of + // length n/2, not n. though the real vector length is still n. + // %val2 = shufflevector<n x t> %red1, <n x t> %undef, + // <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> + // \----------------v-------------/ \----------v------------/ + // n/4 elements 3*n/4 elements + // %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of + // length n/2, the resulting vector has length n/4 etc. + // 2. Pairwise reduction: + // Everything is the same except for an additional shuffle operation which + // is used to produce operands for pairwise kind of reductions. + // %val1 = shufflevector<n x t> %val, <n x t> %undef, + // <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> + // \-------------v----------/ \----------v------------/ + // n/2 elements n/2 elements + // %val2 = shufflevector<n x t> %val, <n x t> %undef, + // <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> + // \-------------v----------/ \----------v------------/ + // n/2 elements n/2 elements + // %red1 = op <n x t> %val1, <n x t> val2 + // Again, the operation is performed on <n x t> vector, but the resulting + // vector %red1 is <n/2 x t> vector. + // + // The cost model should take into account that the actual length of the + // vector is reduced on each iteration. + unsigned ArithCost = 0; + unsigned ShuffleCost = 0; + auto *ConcreteTTI = static_cast<T *>(this); + std::pair<unsigned, MVT> LT = + ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); + unsigned LongVectorCount = 0; + unsigned MVTLen = + LT.second.isVector() ? LT.second.getVectorNumElements() : 1; + while (NumVecElts > MVTLen) { + NumVecElts /= 2; + // Assume the pairwise shuffles add a cost. + ShuffleCost += (IsPairwise + 1) * + ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, + NumVecElts, Ty); + ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); + Ty = VectorType::get(ScalarTy, NumVecElts); + ++LongVectorCount; + } + // The minimal length of the vector is limited by the real length of vector + // operations performed on the current platform. That's why several final + // reduction opertions are perfomed on the vectors with the same + // architecture-dependent length. + ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) * + ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, + NumVecElts, Ty); + ArithCost += (NumReduxLevels - LongVectorCount) * + ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); } @@ -951,13 +1020,6 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { public: explicit BasicTTIImpl(const TargetMachine *ST, const Function &F); - - // Provide value semantics. MSVC requires that we spell all of these out. - BasicTTIImpl(const BasicTTIImpl &Arg) - : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} - BasicTTIImpl(BasicTTIImpl &&Arg) - : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), - TLI(std::move(Arg.TLI)) {} }; } |