diff options
Diffstat (limited to 'lib/Target/ARM/ARMTargetTransformInfo.cpp')
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.cpp | 147 |
1 files changed, 88 insertions, 59 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 6bbb38f..a2ace62 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -14,17 +14,18 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armtti" #include "ARM.h" #include "ARMTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "armtti" + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeARMTTIPass(PassRegistry &); @@ -32,7 +33,7 @@ void initializeARMTTIPass(PassRegistry &); namespace { -class ARMTTI : public ImmutablePass, public TargetTransformInfo { +class ARMTTI final : public ImmutablePass, public TargetTransformInfo { const ARMBaseTargetMachine *TM; const ARMSubtarget *ST; const ARMTargetLowering *TLI; @@ -42,7 +43,7 @@ class ARMTTI : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -52,15 +53,11 @@ public: initializeARMTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + void initializePass() override { pushTTIStack(this); } - virtual void finalizePass() { - popTTIStack(); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -68,7 +65,7 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; @@ -76,8 +73,8 @@ public: /// \name Scalar TTI Implementations /// @{ - - virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + using TargetTransformInfo::getIntImmCost; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; /// @} @@ -85,7 +82,7 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) const { + unsigned getNumberOfRegisters(bool Vector) const override { if (Vector) { if (ST->hasNEON()) return 16; @@ -94,10 +91,10 @@ public: if (ST->isThumb1Only()) return 8; - return 16; + return 13; } - unsigned getRegisterBitWidth(bool Vector) const { + unsigned getRegisterBitWidth(bool Vector) const override { if (Vector) { if (ST->hasNEON()) return 128; @@ -107,7 +104,7 @@ public: return 32; } - unsigned getMaximumUnrollFactor() const { + unsigned getMaximumUnrollFactor() const override { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) return 2; @@ -115,23 +112,27 @@ public: } unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; + int Index, Type *SubTp) const override; unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; + Type *Src) const override; - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const override; - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const override; - unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; + unsigned getAddressComputationCost(Type *Val, + bool IsComplex) const override; - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info = OK_AnyValue, - OperandValueKind Op2Info = OK_AnyValue) const; + unsigned + getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue) const override; unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace) const override; /// @} }; @@ -162,25 +163,25 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { (ARM_AM::getSOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; - } else if (ST->isThumb2()) { + } + if (ST->isThumb2()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; - } else /*Thumb1*/ { - if (SImmVal >= 0 && SImmVal < 256) - return 1; - if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) - return 2; - // Load from constantpool. - return 3; } - return 2; + // Thumb1. + if (SImmVal >= 0 && SImmVal < 256) + return 1; + if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + return 2; + // Load from constantpool. + return 3; } unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { + Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -442,34 +443,62 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { - // We only handle costs of reverse shuffles for now. - if (Kind != SK_Reverse) + // We only handle costs of reverse and alternate shuffles for now. + if (Kind != SK_Reverse && Kind != SK_Alternate) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { - // Reverse shuffle cost one instruction if we are shuffling within a double - // word (vrev) or two if we shuffle a quad word (vrev, vext). - { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, - - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } - }; + if (Kind == SK_Reverse) { + static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a + // double word (vrev) or two if we shuffle a quad word (vrev, vext). + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - return LT.first * NEONShuffleTbl[Idx].Cost; + return LT.first * NEONShuffleTbl[Idx].Cost; + } + if (Kind == SK_Alternate) { + static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = { + // Alt shuffle cost table for ARM. Cost is the number of instructions + // required to create the shuffled vector. + + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, + + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + int Idx = + CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return LT.first * NEONAltShuffleTbl[Idx].Cost; + } + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info, +unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info, OperandValueKind Op2Info) const { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); @@ -533,7 +562,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK // creates a sequence of shift, and, or instructions to construct values. // These sequences are recognized by the ISel and have zero-cost. Not so for // the vectorized code. Because we have support for v2i64 but not i64 those - // sequences look particularily beneficial to vectorize. + // sequences look particularly beneficial to vectorize. // To work around this we increase the cost of v2i64 operations to make them // seem less beneficial. if (LT.second == MVT::v2i64 && |