diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 254 |
1 files changed, 204 insertions, 50 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 3fe61aa..0e3cb7d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -23,15 +23,22 @@ #include "SIISelLowering.h" #include "SIFrameLowering.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstdint> +#include <memory> +#include <utility> #define GET_SUBTARGETINFO_HEADER #include "AMDGPUGenSubtargetInfo.inc" namespace llvm { -class SIMachineFunctionInfo; class StringRef; class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { @@ -50,9 +57,13 @@ public: ISAVersion0_0_0, ISAVersion7_0_0, ISAVersion7_0_1, + ISAVersion7_0_2, ISAVersion8_0_0, ISAVersion8_0_1, - ISAVersion8_0_3 + ISAVersion8_0_2, + ISAVersion8_0_3, + ISAVersion8_0_4, + ISAVersion8_1_0, }; protected: @@ -70,10 +81,12 @@ protected: bool HalfRate64Ops; // Dynamially set bits that enable features. + bool FP16Denormals; bool FP32Denormals; bool FP64Denormals; bool FPExceptions; bool FlatForGlobal; + bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool EnableXNACK; bool DebuggerInsertNops; @@ -97,40 +110,60 @@ protected: bool SGPRInitBug; bool HasSMemRealTime; bool Has16BitInsts; + bool HasMovrel; + bool HasVGPRIndexMode; + bool HasScalarStores; + bool HasInv2PiInlineImm; bool FlatAddressSpace; bool R600ALUInst; bool CaymanISA; bool CFALUBug; bool HasVertexCache; short TexVTXClauseSize; + bool ScalarizeGlobal; // Dummy feature to use for assembler in tablegen. bool FeatureDisable; InstrItineraryData InstrItins; + SelectionDAGTargetInfo TSInfo; public: AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM); - virtual ~AMDGPUSubtarget(); + ~AMDGPUSubtarget() override; + AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); - const AMDGPUInstrInfo *getInstrInfo() const override; - const AMDGPUFrameLowering *getFrameLowering() const override; - const AMDGPUTargetLowering *getTargetLowering() const override; - const AMDGPURegisterInfo *getRegisterInfo() const override; + const AMDGPUInstrInfo *getInstrInfo() const override = 0; + const AMDGPUFrameLowering *getFrameLowering() const override = 0; + const AMDGPUTargetLowering *getTargetLowering() const override = 0; + const AMDGPURegisterInfo *getRegisterInfo() const override = 0; const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; } + // Nothing implemented, just prevent crashes on use. + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool isAmdHsaOS() const { return TargetTriple.getOS() == Triple::AMDHSA; } + bool isMesa3DOS() const { + return TargetTriple.getOS() == Triple::Mesa3D; + } + + bool isOpenCLEnv() const { + return TargetTriple.getEnvironment() == Triple::OpenCL; + } + Generation getGeneration() const { return Gen; } @@ -151,6 +184,10 @@ public: return MaxPrivateElementSize; } + bool has16BitInsts() const { + return Has16BitInsts; + } + bool hasHWFP64() const { return FP64; } @@ -230,6 +267,10 @@ public: return DumpCode; } + bool enableIEEEBit(const MachineFunction &MF) const { + return AMDGPU::isCompute(MF.getFunction()->getCallingConv()); + } + /// Return the amount of LDS that can be used that will not restrict the /// occupancy lower than WaveCount. unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const; @@ -238,6 +279,9 @@ public: /// the given LDS memory size is the only constraint. unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; + bool hasFP16Denormals() const { + return FP16Denormals; + } bool hasFP32Denormals() const { return FP32Denormals; @@ -259,22 +303,43 @@ public: return UnalignedBufferAccess; } + bool hasUnalignedScratchAccess() const { + return UnalignedScratchAccess; + } + bool isXNACKEnabled() const { return EnableXNACK; } - unsigned getMaxWavesPerCU() const { - if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 10; + bool isMesaKernel(const MachineFunction &MF) const { + return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } - // FIXME: Not sure what this is for other subtagets. - return 8; + // Covers VS/PS/CS graphics shaders + bool isMesaGfxShader(const MachineFunction &MF) const { + return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + bool isAmdCodeObjectV2(const MachineFunction &MF) const { + return isAmdHsaOS() || isMesaKernel(MF); } /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset() const { - return isAmdHsaOS() ? 0 : 36; + unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { + return isAmdCodeObjectV2(MF) ? 0 : 36; + } + + unsigned getAlignmentForImplicitArgPtr() const { + return isAmdHsaOS() ? 8 : 4; + } + + unsigned getImplicitArgNumBytes(const MachineFunction &MF) const { + if (isMesaKernel(MF)) + return 16; + if (isAmdHsaOS() && isOpenCLEnv()) + return 32; + return 0; } unsigned getStackAlignment() const { @@ -289,6 +354,92 @@ public: bool enableSubRegLiveness() const override { return true; } + + /// \returns Number of execution units per compute unit supported by the + /// subtarget. + unsigned getEUsPerCU() const { + return 4; + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given flat work group size. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { + if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 8; + return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16; + } + + /// \returns Maximum number of waves per compute unit supported by the + /// subtarget without any kind of limitation. + unsigned getMaxWavesPerCU() const { + return getMaxWavesPerEU() * getEUsPerCU(); + } + + /// \returns Maximum number of waves per compute unit supported by the + /// subtarget and limited by given flat work group size. + unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { + return getWavesPerWorkGroup(FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const { + return 1; + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget without any kind of limitation. + unsigned getMaxWavesPerEU() const { + if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 8; + // FIXME: Need to take scratch memory into account. + return 10; + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given flat work group size. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { + return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) / + getEUsPerCU(); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const { + return 1; + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const { + return 2048; + } + + /// \returns Number of waves per work group given the flat work group size. + unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { + return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize(); + } + + void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;} + bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;} + + /// \returns Subtarget's default pair of minimum/maximum flat work group sizes + /// for function \p F, or minimum/maximum flat work group sizes explicitly + /// requested using "amdgpu-flat-work-group-size" attribute attached to + /// function \p F. + /// + /// \returns Subtarget's default values if explicitly requested values cannot + /// be converted to integer, or violate subtarget's specifications. + std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; + + /// \returns Subtarget's default pair of minimum/maximum number of waves per + /// execution unit for function \p F, or minimum/maximum number of waves per + /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute + /// attached to function \p F. + /// + /// \returns Subtarget's default values if explicitly requested values cannot + /// be converted to integer, violate subtarget's specifications, or are not + /// compatible with minimum/maximum number of waves limited by flat work group + /// size, register usage, and/or lds usage. + std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; }; class R600Subtarget final : public AMDGPUSubtarget { @@ -328,14 +479,14 @@ public: short getTexVTXClauseSize() const { return TexVTXClauseSize; } - - unsigned getStackEntrySize() const; }; class SISubtarget final : public AMDGPUSubtarget { public: enum { - FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 + // The closed Vulkan driver sets 96, which limits the wave count to 8 but + // doesn't spill SGPRs as much as when 80 is set. + FIXED_SGPR_COUNT_FOR_INIT_BUG = 96 }; private: @@ -378,10 +529,6 @@ public: bool isVGPRSpillingEnabled(const Function& F) const; - unsigned getAmdKernelCodeChipID() const; - - AMDGPU::IsaVersion getIsaVersion() const; - unsigned getMaxNumUserSGPRs() const { return 16; } @@ -394,8 +541,24 @@ public: return HasSMemRealTime; } - bool has16BitInsts() const { - return Has16BitInsts; + bool hasMovrel() const { + return HasMovrel; + } + + bool hasVGPRIndexMode() const { + return HasVGPRIndexMode; + } + + bool hasScalarCompareEq64() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasScalarStores() const { + return HasScalarStores; + } + + bool hasInv2PiInlineImm() const { + return HasInv2PiInlineImm; } bool enableSIScheduler() const { @@ -426,37 +589,28 @@ public: bool hasSGPRInitBug() const { return SGPRInitBug; } -}; - - -inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const { - if (getGeneration() >= SOUTHERN_ISLANDS) - return static_cast<const SISubtarget *>(this)->getInstrInfo(); - - return static_cast<const R600Subtarget *>(this)->getInstrInfo(); -} -inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const { - if (getGeneration() >= SOUTHERN_ISLANDS) - return static_cast<const SISubtarget *>(this)->getFrameLowering(); + bool has12DWordStoreHazard() const { + return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; + } - return static_cast<const R600Subtarget *>(this)->getFrameLowering(); -} + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; -inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const { - if (getGeneration() >= SOUTHERN_ISLANDS) - return static_cast<const SISubtarget *>(this)->getTargetLowering(); + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs + unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; - return static_cast<const R600Subtarget *>(this)->getTargetLowering(); -} + /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs + unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; -inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const { - if (getGeneration() >= SOUTHERN_ISLANDS) - return static_cast<const SISubtarget *>(this)->getRegisterInfo(); + /// \returns True if waitcnt instruction is needed before barrier instruction, + /// false otherwise. + bool needWaitcntBeforeBarrier() const { + return true; + } - return static_cast<const R600Subtarget *>(this)->getRegisterInfo(); -} + unsigned getMaxNumSGPRs() const; +}; -} // End namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H |