summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h')
-rw-r--r--contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h126
1 files changed, 94 insertions, 32 deletions
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 69951af..7efdbcc 100644
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -60,8 +60,9 @@ private:
return Cost;
}
- /// Estimate the cost overhead of SK_Alternate shuffle.
- unsigned getAltShuffleOverhead(Type *Ty) {
+ /// Estimate a cost of shuffle as a sequence of extract and insert
+ /// operations.
+ unsigned getPermuteShuffleOverhead(Type *Ty) {
assert(Ty->isVectorTy() && "Can only shuffle vectors");
unsigned Cost = 0;
// Shuffle cost is equal to the cost of extracting element from its argument
@@ -97,18 +98,13 @@ protected:
using TargetTransformInfoImplBase::DL;
public:
- // Provide value semantics. MSVC requires that we spell all of these out.
- BasicTTIImplBase(const BasicTTIImplBase &Arg)
- : BaseT(static_cast<const BaseT &>(Arg)) {}
- BasicTTIImplBase(BasicTTIImplBase &&Arg)
- : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
-
/// \name Scalar TTI Implementations
/// @{
- bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+ bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
+ unsigned BitWidth, unsigned AddressSpace,
unsigned Alignment, bool *Fast) const {
- MVT M = MVT::getIntegerVT(BitWidth);
- return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast);
+ EVT E = EVT::getIntegerVT(Context, BitWidth);
+ return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
}
bool hasBranchDivergence() { return false; }
@@ -144,6 +140,10 @@ public:
return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
}
+ bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) {
+ return getTLI()->isFoldableMemAccessOffset(I, Offset);
+ }
+
bool isTruncateFree(Type *Ty1, Type *Ty2) {
return getTLI()->isTruncateFree(Ty1, Ty2);
}
@@ -279,8 +279,17 @@ public:
}
// Enable runtime and partial unrolling up to the specified size.
- UP.Partial = UP.Runtime = true;
- UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
+ // Enable using trip count upper bound to unroll loops.
+ UP.Partial = UP.Runtime = UP.UpperBound = true;
+ UP.PartialThreshold = MaxOps;
+
+ // Avoid unrolling when optimizing for size.
+ UP.OptSizeThreshold = 0;
+ UP.PartialOptSizeThreshold = 0;
+
+ // Set number of instructions optimized when "back edge"
+ // becomes "fall through" to default value of 2.
+ UP.BEInsns = 2;
}
/// @}
@@ -299,7 +308,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -343,8 +353,9 @@ public:
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Alternate) {
- return getAltShuffleOverhead(Tp);
+ if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
+ Kind == TTI::SK_PermuteSingleSrc) {
+ return getPermuteShuffleOverhead(Tp);
}
return 1;
}
@@ -915,20 +926,78 @@ public:
return LT.first;
}
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; }
+ unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
+ const SCEV *) {
+ return 0;
+ }
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
assert(Ty->isVectorTy() && "Expect a vector type");
+ Type *ScalarTy = Ty->getVectorElementType();
unsigned NumVecElts = Ty->getVectorNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
- unsigned ArithCost =
- NumReduxLevels *
- static_cast<T *>(this)->getArithmeticInstrCost(Opcode, Ty);
- // Assume the pairwise shuffles add a cost.
- unsigned ShuffleCost =
- NumReduxLevels * (IsPairwise + 1) *
- static_cast<T *>(this)
- ->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
+ // Try to calculate arithmetic and shuffle op costs for reduction operations.
+ // We're assuming that reduction operation are performing the following way:
+ // 1. Non-pairwise reduction
+ // %val1 = shufflevector<n x t> %val, <n x t> %undef,
+ // <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
+ // \----------------v-------------/ \----------v------------/
+ // n/2 elements n/2 elements
+ // %red1 = op <n x t> %val, <n x t> val1
+ // After this operation we have a vector %red1 with only maningfull the
+ // first n/2 elements, the second n/2 elements are undefined and can be
+ // dropped. All other operations are actually working with the vector of
+ // length n/2, not n. though the real vector length is still n.
+ // %val2 = shufflevector<n x t> %red1, <n x t> %undef,
+ // <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
+ // \----------------v-------------/ \----------v------------/
+ // n/4 elements 3*n/4 elements
+ // %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
+ // length n/2, the resulting vector has length n/4 etc.
+ // 2. Pairwise reduction:
+ // Everything is the same except for an additional shuffle operation which
+ // is used to produce operands for pairwise kind of reductions.
+ // %val1 = shufflevector<n x t> %val, <n x t> %undef,
+ // <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
+ // \-------------v----------/ \----------v------------/
+ // n/2 elements n/2 elements
+ // %val2 = shufflevector<n x t> %val, <n x t> %undef,
+ // <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
+ // \-------------v----------/ \----------v------------/
+ // n/2 elements n/2 elements
+ // %red1 = op <n x t> %val1, <n x t> val2
+ // Again, the operation is performed on <n x t> vector, but the resulting
+ // vector %red1 is <n/2 x t> vector.
+ //
+ // The cost model should take into account that the actual length of the
+ // vector is reduced on each iteration.
+ unsigned ArithCost = 0;
+ unsigned ShuffleCost = 0;
+ auto *ConcreteTTI = static_cast<T *>(this);
+ std::pair<unsigned, MVT> LT =
+ ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
+ unsigned LongVectorCount = 0;
+ unsigned MVTLen =
+ LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
+ while (NumVecElts > MVTLen) {
+ NumVecElts /= 2;
+ // Assume the pairwise shuffles add a cost.
+ ShuffleCost += (IsPairwise + 1) *
+ ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+ NumVecElts, Ty);
+ ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
+ Ty = VectorType::get(ScalarTy, NumVecElts);
+ ++LongVectorCount;
+ }
+ // The minimal length of the vector is limited by the real length of vector
+ // operations performed on the current platform. That's why several final
+ // reduction opertions are perfomed on the vectors with the same
+ // architecture-dependent length.
+ ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
+ ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+ NumVecElts, Ty);
+ ArithCost += (NumReduxLevels - LongVectorCount) *
+ ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
}
@@ -951,13 +1020,6 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
public:
explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
-
- // Provide value semantics. MSVC requires that we spell all of these out.
- BasicTTIImpl(const BasicTTIImpl &Arg)
- : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
- BasicTTIImpl(BasicTTIImpl &&Arg)
- : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
- TLI(std::move(Arg.TLI)) {}
};
}
OpenPOWER on IntegriCloud