diff options
author | dim <dim@FreeBSD.org> | 2016-01-06 20:01:02 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2016-01-06 20:01:02 +0000 |
commit | ff2ba393a56d9d99dcb76ceada542233db28af9a (patch) | |
tree | ea70b740d40cffe568a990c7aecd1acb5f83f786 /lib/Target/AMDGPU | |
parent | 7c35321d839f2c4d0fc8510bfbd8954b07908b76 (diff) | |
download | FreeBSD-src-ff2ba393a56d9d99dcb76ceada542233db28af9a.zip FreeBSD-src-ff2ba393a56d9d99dcb76ceada542233db28af9a.tar.gz |
Vendor import of llvm trunk r256945:
https://llvm.org/svn/llvm-project/llvm/trunk@256945
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPU.td | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 13 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstructions.td | 35 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/CIInstructions.td | 76 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIFrameLowering.cpp | 86 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.td | 28 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIInstructions.td | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 11 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIRegisterInfo.cpp | 43 | ||||
-rw-r--r-- | lib/Target/AMDGPU/VIInstructions.td | 9 |
12 files changed, 153 insertions, 163 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index d4af8d2..db869cf 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "true", "Support flat address space">; +def FeatureXNACK : SubtargetFeature<"xnack", + "EnableXNACK", + "true", + "Enable XNACK support">; + def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "EnableVGPRSpilling", "true", diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index ba71dc0..9c37902 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } } - if (VCCUsed || FlatUsed) + if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { MaxSGPR += 2; - if (FlatUsed) { - MaxSGPR += 2; - // 2 additional for VI+. - if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (FlatUsed) + MaxSGPR += 2; + + if (STM.isXNACKEnabled()) MaxSGPR += 2; } @@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, if (MFI->hasDispatchPtr()) header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + if (STM.isXNACKEnabled()) + header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; + header.kernarg_segment_byte_size = MFI->ABIArgOffset; header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 11f6139..2a7ce6a 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -204,14 +204,6 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; -def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); }]>; @@ -243,14 +235,6 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; -def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); }]>; @@ -299,16 +283,6 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), return isGlobalStore(dyn_cast<StoreSDNode>(N)); }]>; -def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr), - (truncstorei8 node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast<StoreSDNode>(N)); -}]>; - -def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr), - (truncstorei16 node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast<StoreSDNode>(N)); -}]>; - def local_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return isLocalStore(dyn_cast<StoreSDNode>(N)); @@ -385,15 +359,6 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>; -def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -def flat_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast<StoreSDNode>(N)); -}]>; - def mskor_flat : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 44e0c47..c6af5b9 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + EnableXNACK(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 9c7bb88..d371227 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ private: bool EnableIfCvt; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; + bool EnableXNACK; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -290,6 +291,10 @@ public: } bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + bool isXNACKEnabled() const { + return EnableXNACK; + } + unsigned getMaxWavesPerCU() const { if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) return 10; diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index 88a090d..c543814 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -264,42 +264,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < } // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst -//===----------------------------------------------------------------------===// -// Flat Patterns -//===----------------------------------------------------------------------===// - -let Predicates = [HasFlatAddressSpace] in { - -class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt, - PatFrag flat_ld> : - Pat <(vt (flat_ld i64:$ptr)), - (Instr_ADDR64 $ptr, 0, 0, 0) ->; - -def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>; - -class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> : - Pat <(st vt:$value, i64:$ptr), - (Instr $value, $ptr, 0, 0, 0) - >; - -def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>; -def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>; -def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>; -def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>; -def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>; -def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>; - -} // End HasFlatAddressSpace predicate - let Predicates = [isCI] in { // Convert (x - floor(x)) to fract(x) @@ -320,20 +284,10 @@ def : Pat < //===----------------------------------------------------------------------===// -// Patterns to generate flat for global +// Flat Patterns //===----------------------------------------------------------------------===// -def useFlatForGlobal : Predicate < - "Subtarget->useFlatForGlobal() || " - "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">; - -let Predicates = [useFlatForGlobal] in { - -// 1. Offset as 20bit DWORD immediate -def : Pat < - (SIload_constant v4i32:$sbase, IMM20bit:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) ->; +let Predicates = [isCIVI] in { // Patterns for global loads with no offset class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < @@ -341,24 +295,24 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (inst $addr, 0, 0, 0) >; -def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>; -def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>; -def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>; -def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>; -def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>; -def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>; -def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>; +def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>; +def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>; +def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>; +def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>; +def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>; +def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>; +def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>; class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (node vt:$data, i64:$addr), (inst $data, $addr, 0, 0, 0) >; -def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>; -def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>; -def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>; -def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>; -def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>; +def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>; +def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>; +def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>; +def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>; +def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>; class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (vt (node i64:$addr, vt:$data)), @@ -376,4 +330,4 @@ def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; -} // End Predicates = [useFlatForGlobal] +} // End Predicates = [isCIVI] diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 6b3c81c..7d20509 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MBB.addLiveIn(PreloadedPrivateBufferReg); } - // We reserved the last registers for this. Shift it down to the end of those - // which were actually used. - // - // FIXME: It might be safer to use a pseudoregister before replacement. - - // FIXME: We should be able to eliminate unused input registers. We only - // cannot do this for the resources required for scratch access. For now we - // skip over user SGPRs and may leave unused holes. - - // We find the resource first because it has an alignment requirement. - if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - - unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4; - // Skip the last 2 elements because the last one is reserved for VCC, and - // this is the 2nd to last element already. - for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) { - // Pick the first unallocated one. Make sure we don't clobber the other - // reserved input we needed. - if (!MRI.isPhysRegUsed(Reg)) { - assert(MRI.isAllocatable(Reg)); - MRI.replaceRegWith(ScratchRsrcReg, Reg); - ScratchRsrcReg = Reg; - MFI->setScratchRSrcReg(ScratchRsrcReg); - break; + if (!ST.hasSGPRInitBug()) { + // We reserved the last registers for this. Shift it down to the end of those + // which were actually used. + // + // FIXME: It might be safer to use a pseudoregister before replacement. + + // FIXME: We should be able to eliminate unused input registers. We only + // cannot do this for the resources required for scratch access. For now we + // skip over user SGPRs and may leave unused holes. + + // We find the resource first because it has an alignment requirement. + if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4; + // Skip the last 2 elements because the last one is reserved for VCC, and + // this is the 2nd to last element already. + for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) { + // Pick the first unallocated one. Make sure we don't clobber the other + // reserved input we needed. + if (!MRI.isPhysRegUsed(Reg)) { + assert(MRI.isAllocatable(Reg)); + MRI.replaceRegWith(ScratchRsrcReg, Reg); + ScratchRsrcReg = Reg; + MFI->setScratchRSrcReg(ScratchRsrcReg); + break; + } } } - } - if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Skip the last 2 elements because the last one is reserved for VCC, and - // this is the 2nd to last element already. - unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); - for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) { - // Pick the first unallocated SGPR. Be careful not to pick an alias of the - // scratch descriptor, since we haven’t added its uses yet. - if (!MRI.isPhysRegUsed(Reg)) { - assert(MRI.isAllocatable(Reg) && - !TRI->isSubRegisterEq(ScratchRsrcReg, Reg)); - - MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); - ScratchWaveOffsetReg = Reg; - MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg); - break; + if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Skip the last 2 elements because the last one is reserved for VCC, and + // this is the 2nd to last element already. + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); + for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) { + // Pick the first unallocated SGPR. Be careful not to pick an alias of the + // scratch descriptor, since we haven’t added its uses yet. + if (!MRI.isPhysRegUsed(Reg)) { + assert(MRI.isAllocatable(Reg) && + !TRI->isSubRegisterEq(ScratchRsrcReg, Reg)); + + MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); + ScratchWaveOffsetReg = Reg; + MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg); + break; + } } } } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 10f2adde..8735277 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -134,6 +134,34 @@ def SIconstdata_ptr : SDNode< SDTCisVT<0, i64>]> >; +//===----------------------------------------------------------------------===// +// PatFrags for FLAT instructions +//===----------------------------------------------------------------------===// + +class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr), + (ld node:$ptr), [{ + return isFlatLoad(dyn_cast<LoadSDNode>(N)) || + isGlobalLoad(dyn_cast<LoadSDNode>(N)) || + isConstantLoad(cast<LoadSDNode>(N), -1); +}]>; + +def flat_load : flat_ld <load>; +def flat_az_extloadi8 : flat_ld <az_extloadi8>; +def flat_sextloadi8 : flat_ld <sextloadi8>; +def flat_az_extloadi16 : flat_ld <az_extloadi16>; +def flat_sextloadi16 : flat_ld <sextloadi16>; + +class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + return isFlatStore(dyn_cast<StoreSDNode>(N)) || + isGlobalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +def flat_store: flat_st <store>; +def flat_truncstorei8 : flat_st <truncstorei8>; +def flat_truncstorei16 : flat_st <truncstorei16>; + + def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isGlobalLoad(cast<LoadSDNode>(N)) || isConstantLoad(cast<LoadSDNode>(N), -1); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 6f653c7..b7df058 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -59,8 +59,6 @@ defm EXP : EXP_m; // SMRD Instructions //===----------------------------------------------------------------------===// -let mayLoad = 1 in { - // We are using the SGPR_32 and not the SReg_32 register class for 32-bit // SMRD instructions, because the SGPR_32 register class does not include M0 // and writing to M0 from an SMRD instruction will hang the GPU. @@ -90,8 +88,6 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512 >; -} // mayLoad = 1 - //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv", diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 935aad4..bf15516 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -156,6 +156,17 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( if (!LaneVGPRs.count(LaneVGPRIdx)) { unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); + + if (LaneVGPR == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + + // When compiling from inside Mesa, the compilation continues. + // Select an arbitrary register to avoid triggering assertions + // during subsequent passes. + LaneVGPR = AMDGPU::VGPR0; + } + LaneVGPRs[LaneVGPRIdx] = LaneVGPR; // Add this register as live-in to all blocks to avoid machine verifer diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 3cdffef..2afa009 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; + if (ST.isXNACKEnabled()) + BaseIdx -= 4; + unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the - // next sgpr128 down. + // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and + // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down + // either way. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { - unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + unsigned Idx; + + if (!ST.isXNACKEnabled()) + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + else + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; + return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; + if (!ST.isXNACKEnabled()) { + // Next register before reservations for flat_scr and vcc. + return AMDGPU::SGPR97; + } else { + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; + } } return AMDGPU::SGPR95; @@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // for VCC/FLAT_SCR. reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); + + if (ST.isXNACKEnabled()) + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due @@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - // Assume XNACK_MASK is unused. unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + if (ST.isXNACKEnabled()) + Limit -= 2; + for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); @@ -282,11 +303,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - if (Spill.VGPR == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling SGPR"); - } - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) @@ -315,11 +331,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - if (Spill.VGPR == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling SGPR"); - } - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) diff --git a/lib/Target/AMDGPU/VIInstructions.td b/lib/Target/AMDGPU/VIInstructions.td index 20a026a..1a7801c 100644 --- a/lib/Target/AMDGPU/VIInstructions.td +++ b/lib/Target/AMDGPU/VIInstructions.td @@ -101,3 +101,12 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23, } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI +let Predicates = [isVI] in { + +// 1. Offset as 20bit DWORD immediate +def : Pat < + (SIload_constant v4i32:$sbase, IMM20bit:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) +>; + +} // End Predicates = [isVI] |