summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h109
1 files changed, 87 insertions, 22 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 6fc8d18..4c7f38a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -15,14 +15,18 @@
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include <array>
+#include <cassert>
#include <map>
+#include <utility>
namespace llvm {
-class MachineRegisterInfo;
-
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
explicit AMDGPUImagePseudoSourceValue() :
@@ -84,8 +88,16 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned ScratchRSrcReg;
unsigned ScratchWaveOffsetReg;
+ // This is the current function's incremented size from the kernel's scratch
+ // wave offset register. For an entry function, this is exactly the same as
+ // the ScratchWaveOffsetReg.
+ unsigned FrameOffsetReg;
+
+ // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
+ unsigned StackPtrOffsetReg;
+
// Input registers for non-HSA ABI
- unsigned PrivateMemoryPtrUserSGPR;
+ unsigned ImplicitBufferPtrUserSGPR;
// Input registers setup for the HSA ABI.
// User SGPRs in allocation order.
@@ -107,8 +119,15 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned WorkGroupInfoSystemSGPR;
unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
+ // VGPR inputs. These are always v0, v1 and v2 for entry functions.
+ unsigned WorkItemIDXVGPR;
+ unsigned WorkItemIDYVGPR;
+ unsigned WorkItemIDZVGPR;
+
// Graphics info.
unsigned PSInputAddr;
+ unsigned PSInputEnable;
+
bool ReturnsVoid;
// A pair of default/requested minimum/maximum flat work group sizes.
@@ -127,16 +146,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
AMDGPUBufferPseudoSourceValue BufferPSV;
AMDGPUImagePseudoSourceValue ImagePSV;
-public:
- // FIXME: Make private
+private:
unsigned LDSWaveSpillSize;
- unsigned PSInputEna;
- std::map<unsigned, unsigned> LaneVGPRs;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
-private:
bool HasSpilledSGPRs;
bool HasSpilledVGPRs;
bool HasNonSpillStackObjects;
@@ -169,7 +184,7 @@ private:
// Private memory buffer
// Compute directly in sgpr[0:1]
// Other shaders indirect 64-bits at sgpr[0:1]
- bool PrivateMemoryInputPtr : 1;
+ bool ImplicitBufferPtr : 1;
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
@@ -182,19 +197,39 @@ private:
public:
struct SpilledReg {
- unsigned VGPR;
- int Lane;
+ unsigned VGPR = AMDGPU::NoRegister;
+ int Lane = -1;
+
+ SpilledReg() = default;
SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
- SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
+
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != AMDGPU::NoRegister;}
};
- // SIMachineFunctionInfo definition
+private:
+ // SGPR->VGPR spilling support.
+ typedef std::pair<unsigned, unsigned> SpillRegMask;
+
+ // Track VGPR + wave index for each subregister of the SGPR spilled to
+ // frameindex key.
+ DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
+ unsigned NumVGPRSpillLanes = 0;
+ SmallVector<unsigned, 2> SpillVGPRs;
+
+public:
SIMachineFunctionInfo(const MachineFunction &MF);
- SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
- unsigned SubIdx);
+
+ ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
+ auto I = SGPRToVGPRSpills.find(FrameIndex);
+ return (I == SGPRToVGPRSpills.end()) ?
+ ArrayRef<SpilledReg>() : makeArrayRef(I->second);
+ }
+
+ bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
+ void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
@@ -206,7 +241,7 @@ public:
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
- unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
+ unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
unsigned addWorkGroupIDX() {
@@ -311,8 +346,8 @@ public:
return WorkItemIDZ;
}
- bool hasPrivateMemoryInputPtr() const {
- return PrivateMemoryInputPtr;
+ bool hasImplicitBufferPtr() const {
+ return ImplicitBufferPtr;
}
unsigned getNumUserSGPRs() const {
@@ -342,17 +377,35 @@ public:
return ScratchWaveOffsetReg;
}
+ unsigned getFrameOffsetReg() const {
+ return FrameOffsetReg;
+ }
+
+ void setStackPtrOffsetReg(unsigned Reg) {
+ StackPtrOffsetReg = Reg;
+ }
+
+ // Note the unset value for this is AMDGPU::SP_REG rather than
+ // NoRegister. This is mostly a workaround for MIR tests where state that
+ // can't be directly computed from the function is not preserved in serialized
+ // MIR.
+ unsigned getStackPtrOffsetReg() const {
+ return StackPtrOffsetReg;
+ }
+
void setScratchWaveOffsetReg(unsigned Reg) {
assert(Reg != AMDGPU::NoRegister && "Should never be unset");
ScratchWaveOffsetReg = Reg;
+ if (isEntryFunction())
+ FrameOffsetReg = ScratchWaveOffsetReg;
}
unsigned getQueuePtrUserSGPR() const {
return QueuePtrUserSGPR;
}
- unsigned getPrivateMemoryPtrUserSGPR() const {
- return PrivateMemoryPtrUserSGPR;
+ unsigned getImplicitBufferPtrUserSGPR() const {
+ return ImplicitBufferPtrUserSGPR;
}
bool hasSpilledSGPRs() const {
@@ -399,6 +452,10 @@ public:
return PSInputAddr;
}
+ unsigned getPSInputEnable() const {
+ return PSInputEnable;
+ }
+
bool isPSInputAllocated(unsigned Index) const {
return PSInputAddr & (1 << Index);
}
@@ -407,6 +464,10 @@ public:
PSInputAddr |= 1 << Index;
}
+ void markPSInputEnabled(unsigned Index) {
+ PSInputEnable |= 1 << Index;
+ }
+
bool returnsVoid() const {
return ReturnsVoid;
}
@@ -503,6 +564,10 @@ public:
llvm_unreachable("unexpected dimension");
}
+ unsigned getLDSWaveSpillSize() const {
+ return LDSWaveSpillSize;
+ }
+
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
return &BufferPSV;
}
@@ -512,6 +577,6 @@ public:
}
};
-} // End namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
OpenPOWER on IntegriCloud