1 files changed, 204 insertions, 50 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 3fe61aa..0e3cb7d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -23,15 +23,22 @@
 #include "SIISelLowering.h"
 #include "SIFrameLowering.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <utility>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "AMDGPUGenSubtargetInfo.inc"
 
 namespace llvm {
 
-class SIMachineFunctionInfo;
 class StringRef;
 
 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
@@ -50,9 +57,13 @@ public:
     ISAVersion0_0_0,
     ISAVersion7_0_0,
     ISAVersion7_0_1,
+    ISAVersion7_0_2,
     ISAVersion8_0_0,
     ISAVersion8_0_1,
-    ISAVersion8_0_3
+    ISAVersion8_0_2,
+    ISAVersion8_0_3,
+    ISAVersion8_0_4,
+    ISAVersion8_1_0,
   };
 
 protected:
@@ -70,10 +81,12 @@ protected:
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.
+  bool FP16Denormals;
   bool FP32Denormals;
   bool FP64Denormals;
   bool FPExceptions;
   bool FlatForGlobal;
+  bool UnalignedScratchAccess;
   bool UnalignedBufferAccess;
   bool EnableXNACK;
   bool DebuggerInsertNops;
@@ -97,40 +110,60 @@ protected:
   bool SGPRInitBug;
   bool HasSMemRealTime;
   bool Has16BitInsts;
+  bool HasMovrel;
+  bool HasVGPRIndexMode;
+  bool HasScalarStores;
+  bool HasInv2PiInlineImm;
   bool FlatAddressSpace;
   bool R600ALUInst;
   bool CaymanISA;
   bool CFALUBug;
   bool HasVertexCache;
   short TexVTXClauseSize;
+  bool ScalarizeGlobal;
 
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable;
 
   InstrItineraryData InstrItins;
+  SelectionDAGTargetInfo TSInfo;
 
 public:
   AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                   const TargetMachine &TM);
-  virtual ~AMDGPUSubtarget();
+  ~AMDGPUSubtarget() override;
+
   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
                                                    StringRef GPU, StringRef FS);
 
-  const AMDGPUInstrInfo *getInstrInfo() const override;
-  const AMDGPUFrameLowering *getFrameLowering() const override;
-  const AMDGPUTargetLowering *getTargetLowering() const override;
-  const AMDGPURegisterInfo *getRegisterInfo() const override;
+  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
+  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
+  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
+  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
 
   const InstrItineraryData *getInstrItineraryData() const override {
     return &InstrItins;
   }
 
+  // Nothing implemented, just prevent crashes on use.
+  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool isAmdHsaOS() const {
     return TargetTriple.getOS() == Triple::AMDHSA;
   }
 
+  bool isMesa3DOS() const {
+    return TargetTriple.getOS() == Triple::Mesa3D;
+  }
+
+  bool isOpenCLEnv() const {
+    return TargetTriple.getEnvironment() == Triple::OpenCL;
+  }
+
   Generation getGeneration() const {
     return Gen;
   }
@@ -151,6 +184,10 @@ public:
     return MaxPrivateElementSize;
   }
 
+  bool has16BitInsts() const {
+    return Has16BitInsts;
+  }
+
   bool hasHWFP64() const {
     return FP64;
   }
@@ -230,6 +267,10 @@ public:
     return DumpCode;
   }
 
+  bool enableIEEEBit(const MachineFunction &MF) const {
+    return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
+  }
+
   /// Return the amount of LDS that can be used that will not restrict the
   /// occupancy lower than WaveCount.
   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
@@ -238,6 +279,9 @@ public:
   /// the given LDS memory size is the only constraint.
   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
 
+  bool hasFP16Denormals() const {
+    return FP16Denormals;
+  }
 
   bool hasFP32Denormals() const {
     return FP32Denormals;
@@ -259,22 +303,43 @@ public:
     return UnalignedBufferAccess;
   }
 
+  bool hasUnalignedScratchAccess() const {
+    return UnalignedScratchAccess;
+  }
+
   bool isXNACKEnabled() const {
     return EnableXNACK;
   }
 
-  unsigned getMaxWavesPerCU() const {
-    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
-      return 10;
+  bool isMesaKernel(const MachineFunction &MF) const {
+    return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+  }
 
-    // FIXME: Not sure what this is for other subtagets.
-    return 8;
+  // Covers VS/PS/CS graphics shaders
+  bool isMesaGfxShader(const MachineFunction &MF) const {
+    return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+  }
+
+  bool isAmdCodeObjectV2(const MachineFunction &MF) const {
+    return isAmdHsaOS() || isMesaKernel(MF);
   }
 
   /// \brief Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
-  unsigned getExplicitKernelArgOffset() const {
-    return isAmdHsaOS() ? 0 : 36;
+  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+    return isAmdCodeObjectV2(MF) ? 0 : 36;
+  }
+
+  unsigned getAlignmentForImplicitArgPtr() const {
+    return isAmdHsaOS() ? 8 : 4;
+  }
+
+  unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
+    if (isMesaKernel(MF))
+      return 16;
+    if (isAmdHsaOS() && isOpenCLEnv())
+      return 32;
+    return 0;
   }
 
   unsigned getStackAlignment() const {
@@ -289,6 +354,92 @@ public:
   bool enableSubRegLiveness() const override {
     return true;
   }
+
+  /// \returns Number of execution units per compute unit supported by the
+  /// subtarget.
+  unsigned getEUsPerCU() const {
+    return 4;
+  }
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given flat work group size.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+    if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 8;
+    return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+  }
+
+  /// \returns Maximum number of waves per compute unit supported by the
+  /// subtarget without any kind of limitation.
+  unsigned getMaxWavesPerCU() const {
+    return getMaxWavesPerEU() * getEUsPerCU();
+  }
+
+  /// \returns Maximum number of waves per compute unit supported by the
+  /// subtarget and limited by given flat work group size.
+  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
+    return getWavesPerWorkGroup(FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const {
+    return 1;
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget without any kind of limitation.
+  unsigned getMaxWavesPerEU() const {
+    if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 8;
+    // FIXME: Need to take scratch memory into account.
+    return 10;
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given flat work group size.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+    return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
+      getEUsPerCU();
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const {
+    return 1;
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const {
+    return 2048;
+  }
+
+  /// \returns Number of waves per work group given the flat work group size.
+  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
+    return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+  }
+
+  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
+  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
+  /// for function \p F, or minimum/maximum flat work group sizes explicitly
+  /// requested using "amdgpu-flat-work-group-size" attribute attached to
+  /// function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, or violate subtarget's specifications.
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+
+  /// \returns Subtarget's default pair of minimum/maximum number of waves per
+  /// execution unit for function \p F, or minimum/maximum number of waves per
+  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
+  /// attached to function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, violate subtarget's specifications, or are not
+  /// compatible with minimum/maximum number of waves limited by flat work group
+  /// size, register usage, and/or lds usage.
+  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
 };
 
 class R600Subtarget final : public AMDGPUSubtarget {
@@ -328,14 +479,14 @@ public:
   short getTexVTXClauseSize() const {
     return TexVTXClauseSize;
   }
-
-  unsigned getStackEntrySize() const;
 };
 
 class SISubtarget final : public AMDGPUSubtarget {
 public:
   enum {
-    FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+    // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+    // doesn't spill SGPRs as much as when 80 is set.
+    FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
   };
 
 private:
@@ -378,10 +529,6 @@ public:
 
   bool isVGPRSpillingEnabled(const Function& F) const;
 
-  unsigned getAmdKernelCodeChipID() const;
-
-  AMDGPU::IsaVersion getIsaVersion() const;
-
   unsigned getMaxNumUserSGPRs() const {
     return 16;
   }
@@ -394,8 +541,24 @@ public:
     return HasSMemRealTime;
   }
 
-  bool has16BitInsts() const {
-    return Has16BitInsts;
+  bool hasMovrel() const {
+    return HasMovrel;
+  }
+
+  bool hasVGPRIndexMode() const {
+    return HasVGPRIndexMode;
+  }
+
+  bool hasScalarCompareEq64() const {
+    return getGeneration() >= VOLCANIC_ISLANDS;
+  }
+
+  bool hasScalarStores() const {
+    return HasScalarStores;
+  }
+
+  bool hasInv2PiInlineImm() const {
+    return HasInv2PiInlineImm;
   }
 
   bool enableSIScheduler() const {
@@ -426,37 +589,28 @@ public:
   bool hasSGPRInitBug() const {
     return SGPRInitBug;
   }
-};
-
-
-inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getInstrInfo();
-
-  return static_cast<const R600Subtarget *>(this)->getInstrInfo();
-}
 
-inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const  {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getFrameLowering();
+  bool has12DWordStoreHazard() const {
+    return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
+  }
 
-  return static_cast<const R600Subtarget *>(this)->getFrameLowering();
-}
+  unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
 
-inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const  {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getTargetLowering();
+  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 
-  return static_cast<const R600Subtarget *>(this)->getTargetLowering();
-}
+  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 
-inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const  {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getRegisterInfo();
+  /// \returns True if waitcnt instruction is needed before barrier instruction,
+  /// false otherwise.
+  bool needWaitcntBeforeBarrier() const {
+    return true;
+  }
 
-  return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
-}
+  unsigned getMaxNumSGPRs() const;
+};
 
-} // End namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H