summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp205
1 files changed, 129 insertions, 76 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index ecd46b9..a7c8166 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -20,17 +20,13 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToVGPR(
- "amdgpu-spill-sgpr-to-vgpr",
- cl::desc("Enable spilling VGPRs to SGPRs"),
- cl::ReallyHidden,
- cl::init(true));
-
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
- ScratchRSrcReg(AMDGPU::NoRegister),
- ScratchWaveOffsetReg(AMDGPU::NoRegister),
+ ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
+ ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
+ FrameOffsetReg(AMDGPU::FP_REG),
+ StackPtrOffsetReg(AMDGPU::SP_REG),
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
DispatchPtrUserSGPR(AMDGPU::NoRegister),
QueuePtrUserSGPR(AMDGPU::NoRegister),
@@ -46,14 +42,17 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
+ WorkItemIDXVGPR(AMDGPU::NoRegister),
+ WorkItemIDYVGPR(AMDGPU::NoRegister),
+ WorkItemIDZVGPR(AMDGPU::NoRegister),
PSInputAddr(0),
+ PSInputEnable(0),
ReturnsVoid(true),
FlatWorkGroupSizes(0, 0),
WavesPerEU(0, 0),
DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
LDSWaveSpillSize(0),
- PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
@@ -78,44 +77,83 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false),
- PrivateMemoryInputPtr(false) {
+ ImplicitBufferPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
+ FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
+ WavesPerEU = ST.getWavesPerEU(*F);
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+ if (!isEntryFunction()) {
+ // Non-entry functions have no special inputs for now, other registers
+ // required for scratch access.
+ ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+ ScratchWaveOffsetReg = AMDGPU::SGPR4;
+ FrameOffsetReg = AMDGPU::SGPR5;
+ StackPtrOffsetReg = AMDGPU::SGPR32;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ // FIXME: Not really a system SGPR.
+ PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ }
- if (!AMDGPU::isShader(F->getCallingConv())) {
- KernargSegmentPtr = true;
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
+ KernargSegmentPtr = !F->arg_empty();
WorkGroupIDX = true;
WorkItemIDX = true;
+ } else if (CC == CallingConv::AMDGPU_PS) {
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
}
- if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
+ if (ST.debuggerEmitPrologue()) {
+ // Enable everything.
+ WorkGroupIDX = true;
WorkGroupIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
+ WorkItemIDX = true;
WorkItemIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-work-group-id-x"))
+ WorkGroupIDX = true;
- // X, XY, and XYZ are the only supported combinations, so make sure Y is
- // enabled if Z is.
- if (WorkItemIDZ)
- WorkItemIDY = true;
+ if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-x"))
+ WorkItemIDX = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+ if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+ }
+
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo.hasStackObjects();
- if (HasStackObjects || MaySpill)
- PrivateSegmentWaveByteOffset = true;
+ if (isEntryFunction()) {
+ // X, XY, and XYZ are the only supported combinations, so make sure Y is
+ // enabled if Z is.
+ if (WorkItemIDZ)
+ WorkItemIDY = true;
+
+ if (HasStackObjects || MaySpill) {
+ PrivateSegmentWaveByteOffset = true;
- if (ST.isAmdCodeObjectV2(MF)) {
+ // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+ (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+ PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ }
+ }
+
+ bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -129,18 +167,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
DispatchID = true;
} else if (ST.isMesaGfxShader(MF)) {
if (HasStackObjects || MaySpill)
- PrivateMemoryInputPtr = true;
+ ImplicitBufferPtr = true;
}
- // We don't need to worry about accessing spills with flat instructions.
- // TODO: On VI where we must use flat for global, we should be able to omit
- // this if it is never used for generic access.
- if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
- ST.isAmdHsaOS())
- FlatScratchInit = true;
+ if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+ KernargSegmentPtr = true;
- FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
- WavesPerEU = ST.getWavesPerEU(*F);
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls that may require it before argument lowering.
+ if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ FlatScratchInit = true;
+ }
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -186,52 +224,67 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return FlatScratchInitUserSGPR;
}
-unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
- PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
+ ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
NumUserSGPRs += 2;
- return PrivateMemoryPtrUserSGPR;
+ return ImplicitBufferPtrUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
- MachineFunction *MF,
- unsigned FrameIndex,
- unsigned SubIdx) {
- if (!EnableSpillSGPRToVGPR)
- return SpilledReg();
-
- const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
-
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
- Offset += SubIdx * 4;
+/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
+ int FI) {
+ std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
- unsigned LaneVGPRIdx = Offset / (64 * 4);
- unsigned Lane = (Offset / 4) % 64;
+ // This has already been allocated.
+ if (!SpillLanes.empty())
+ return true;
- struct SpilledReg Spill;
- Spill.Lane = Lane;
-
- if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
- *MF);
-
- if (LaneVGPR == AMDGPU::NoRegister)
- // We have no VGPRs left for spilling SGPRs.
- return Spill;
-
- LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
-
- // Add this register as live-in to all blocks to avoid machine verifer
- // complaining about use of an undefined physical register.
- for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
- BI != BE; ++BI) {
- BI->addLiveIn(LaneVGPR);
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned WaveSize = ST.getWavefrontSize();
+
+ unsigned Size = FrameInfo.getObjectSize(FI);
+ assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
+ assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
+
+ int NumLanes = Size / 4;
+
+ // Make sure to handle the case where a wide SGPR spill may span between two
+ // VGPRs.
+ for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
+ unsigned LaneVGPR;
+ unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
+
+ if (VGPRIndex == 0) {
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ // We have no VGPRs left for spilling SGPRs. Reset because we won't
+ // partially spill the SGPR to VGPRs.
+ SGPRToVGPRSpills.erase(FI);
+ NumVGPRSpillLanes -= I;
+ return false;
+ }
+
+ SpillVGPRs.push_back(LaneVGPR);
+
+ // Add this register as live-in to all blocks to avoid machine verifer
+ // complaining about use of an undefined physical register.
+ for (MachineBasicBlock &BB : MF)
+ BB.addLiveIn(LaneVGPR);
+ } else {
+ LaneVGPR = SpillVGPRs.back();
}
+
+ SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
}
- Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- return Spill;
+ return true;
+}
+
+void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
+ for (auto &R : SGPRToVGPRSpills)
+ MFI.RemoveStackObject(R.first);
}
OpenPOWER on IntegriCloud