summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-01-31 21:57:38 +0000
committerdim <dim@FreeBSD.org>2015-01-31 21:57:38 +0000
commitc9d63888fe4bf61cab2ca02db39caf47711f401d (patch)
treeaeab28fc1964cf0e6ba5c8157bb158c25a660444 /contrib/llvm/lib/Target/R600
parentde7d9ba1a12bd2fffbea1f10e14038dfdbf7e5bb (diff)
downloadFreeBSD-src-c9d63888fe4bf61cab2ca02db39caf47711f401d.zip
FreeBSD-src-c9d63888fe4bf61cab2ca02db39caf47711f401d.tar.gz
Merge llvm 3.6.0rc2 from ^/vendor/llvm/dist, merge clang 3.6.0rc2 from
^/vendor/clang/dist, resolve conflicts, and cleanup patches.
Diffstat (limited to 'contrib/llvm/lib/Target/R600')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.h6
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.td5
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp19
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp59
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp33
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h5
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp35
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h14
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp26
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h16
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/SIDefines.h4
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.cpp6
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrFormats.td36
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.cpp48
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.h1
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.td102
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstructions.td92
-rw-r--r--contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h1
-rw-r--r--contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp100
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp97
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.h14
-rw-r--r--contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp9
-rw-r--r--contrib/llvm/lib/Target/R600/VIInstructions.td29
25 files changed, 454 insertions, 308 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
index fcf9eca..c660055 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@@ -77,7 +77,11 @@ extern Target TheGCNTarget;
namespace AMDGPU {
enum TargetIndex {
- TI_CONSTDATA_START
+ TI_CONSTDATA_START,
+ TI_SCRATCH_RSRC_DWORD0,
+ TI_SCRATCH_RSRC_DWORD1,
+ TI_SCRATCH_RSRC_DWORD2,
+ TI_SCRATCH_RSRC_DWORD3
};
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.td b/contrib/llvm/lib/Target/R600/AMDGPU.td
index 8a5ca61..1df4448 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.td
@@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"true",
"Support flat address space">;
+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+ "EnableVGPRSpilling",
+ "true",
+ "Enable spilling of VGPRs to scratch memory">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 624f391..6185e36 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -116,7 +116,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.isAmdHsaOS()) {
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
getSIProgramInfo(KernelInfo, MF);
EmitAmdKernelCodeT(MF, KernelInfo);
OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
@@ -421,6 +420,7 @@ static unsigned getRsrcReg(unsigned ShaderType) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
@@ -441,6 +441,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer.EmitIntValue(RsrcReg, 4);
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+ if (STM.isVGPRSpillingEnabled(MFI)) {
+ OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+ OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+ }
}
if (MFI->getShaderType() == ShaderType::PIXEL) {
@@ -504,6 +508,19 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.wavefront_size = STM.getWavefrontSize();
+ const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version",
+ ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(VersionSection);
+ OutStreamer.EmitBytes(Twine("HSA Code Unit:" +
+ Twine(header.hsail_version_major) + "." +
+ Twine(header.hsail_version_minor) + ":" +
+ "AMD:" +
+ Twine(header.amd_code_version_major) + "." +
+ Twine(header.amd_code_version_minor) + ":" +
+ "GFX8.1:0").str());
+
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
if (isVerbose()) {
OutStreamer.emitRawComment("amd_code_version_major = " +
Twine(header.amd_code_version_major), false);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index eaa506d..15112c7 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -417,6 +417,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
N->getValueType(0), Ops);
}
+ case ISD::LOAD: {
+ // To simplify the TableGen patters, we replace all i64 loads with
+ // v2i32 loads. Alternatively, we could promote i64 loads to v2i32
+ // during DAG legalization, however, so places (ExpandUnalignedLoad)
+ // in the DAG legalizer assume that if i64 is legal, so doing this
+ // promotion early can cause problems.
+ EVT VT = N->getValueType(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
+
+ SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
+ LD->getBasePtr(), LD->getMemOperand());
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ MVT::i64, NewLoad);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
+ SelectCode(NewLoad.getNode());
+ N = BitCast.getNode();
+ break;
+ }
+
case AMDGPUISD::REGISTER_LOAD: {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
@@ -962,16 +984,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
- unsigned ScratchPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
unsigned ScratchOffsetReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
ScratchOffsetReg, MVT::i32);
+ SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
+ SDValue ScratchRsrcDword0 =
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
- SDValue ScratchPtr =
- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+ SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
+ SDValue ScratchRsrcDword1 =
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
+
+ const SDValue RsrcOps[] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ ScratchRsrcDword0,
+ CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ ScratchRsrcDword1,
+ CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ };
+ SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::v2i32, RsrcOps), 0);
Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
@@ -988,22 +1021,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
}
}
- // (add FI, n0)
- if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
- isa<FrameIndexSDNode>(Addr.getOperand(0))) {
- VAddr = Addr.getOperand(1);
- ImmOffset = Addr.getOperand(0);
- return true;
- }
-
- // (FI)
- if (isa<FrameIndexSDNode>(Addr)) {
- VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
- CurDAG->getConstant(0, MVT::i32)), 0);
- ImmOffset = Addr;
- return true;
- }
-
// (node)
VAddr = Addr;
ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index 206050d..2adcdf1 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -187,9 +187,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
- setOperationAction(ISD::LOAD, MVT::i64, Promote);
- AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
-
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
index 5beaa68..e34a7b7 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -341,8 +341,39 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
// instead.
namespace llvm {
namespace AMDGPU {
-int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
}
}
}
+
+// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
+enum SISubtarget {
+ SI = 0,
+ VI = 1
+};
+
+enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
+ switch (Gen) {
+ default:
+ return SI;
+ case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+ return VI;
+ }
+}
+
+int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
+ int MCOp = AMDGPU::getMCOpcode(Opcode,
+ AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+
+ // -1 means that Opcode is already a native instruction.
+ if (MCOp == -1)
+ return Opcode;
+
+ // (uint16_t)-1 means that Opcode is a pseudo instruction that has
+ // no encoding in the given subtarget generation.
+ if (MCOp == (uint16_t)-1)
+ return -1;
+
+ return MCOp;
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
index da9833d..e28ce0f 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
@@ -135,6 +135,11 @@ public:
bool isRegisterStore(const MachineInstr &MI) const;
bool isRegisterLoad(const MachineInstr &MI) const;
+ /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
+ /// Return -1 if the target-specific opcode for the pseudo instruction does
+ /// not exist. If Opcode is not a pseudo instruction, this is identity.
+ int pseudoToMCOpcode(int Opcode) const;
+
//===---------------------------------------------------------------------===//
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
index 1995ef2..03aa32d 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -39,29 +40,17 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
Ctx(ctx), ST(st)
{ }
-enum AMDGPUMCInstLower::SISubtarget
-AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const {
- switch (Gen) {
- default:
- return AMDGPUMCInstLower::SI;
- case AMDGPUSubtarget::VOLCANIC_ISLANDS:
- return AMDGPUMCInstLower::VI;
- }
-}
-
-unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const {
-
- int MCOpcode = AMDGPU::getMCOpcode(MIOpcode,
- AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
- if (MCOpcode == -1)
- MCOpcode = MIOpcode;
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- return MCOpcode;
-}
+ int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
-void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ if (MCOpcode == -1) {
+ LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
+ C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
+ "a target-specific version: " + Twine(MI->getOpcode()));
+ }
- OutMI.setOpcode(getMCOpcode(MI->getOpcode()));
+ OutMI.setOpcode(MCOpcode);
for (const MachineOperand &MO : MI->explicit_operands()) {
MCOperand MCOp;
@@ -91,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::CreateExpr(Expr);
break;
}
+ case MachineOperand::MO_ExternalSymbol: {
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ }
}
OutMI.addOperand(MCOp);
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
index 0ae4d11..d322fe0 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
@@ -19,23 +19,9 @@ class MCContext;
class MCInst;
class AMDGPUMCInstLower {
-
- // This must be kept in sync with the SISubtarget class in SIInstrInfo.td
- enum SISubtarget {
- SI = 0,
- VI = 1
- };
-
MCContext &Ctx;
const AMDGPUSubtarget &ST;
- /// Convert a member of the AMDGPUSubtarget::Generation enum to the
- /// SISubtarget enum.
- enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const;
-
- /// Get the MC opcode for this MachineInstr.
- unsigned getMCOpcode(unsigned MIOpcode) const;
-
public:
AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index 597e558..b1c7498 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -18,7 +18,9 @@
#include "R600MachineScheduler.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineScheduler.h"
using namespace llvm;
@@ -78,6 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+ EnableVGPRSpilling(false),
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
@@ -113,3 +116,26 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
case SEA_ISLANDS: return 12;
}
}
+
+bool AMDGPUSubtarget::isVGPRSpillingEnabled(
+ const SIMachineFunctionInfo *MFI) const {
+ return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
+}
+
+void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (getGeneration() >= SOUTHERN_ISLANDS) {
+
+ // Track register pressure so the scheduler can try to decrease
+ // pressure once register usage is above the threshold defined by
+ // SIRegisterInfo::getRegPressureSetLimit()
+ Policy.ShouldTrackPressure = true;
+
+ // Enabling both top down and bottom up scheduling seems to give us less
+ // register spills than just using one of these approaches on its own.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
index 90179d7..566b45c 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -30,6 +30,8 @@
namespace llvm {
+class SIMachineFunctionInfo;
+
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
public:
@@ -63,6 +65,7 @@ private:
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
+ bool EnableVGPRSpilling;
const DataLayout DL;
AMDGPUFrameLowering FrameLowering;
@@ -206,6 +209,10 @@ public:
return getGeneration() <= NORTHERN_ISLANDS;
}
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin, MachineInstr *end,
+ unsigned NumRegionInstrs) const override;
+
// Helper functions to simplify if statements
bool isTargetELF() const {
return false;
@@ -224,6 +231,15 @@ public:
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
+ bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+
+ unsigned getMaxWavesPerCU() const {
+ if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 10;
+
+ // FIXME: Not sure what this is for other subtagets.
+ llvm_unreachable("do not know max waves per CU for this subtarget.");
+ }
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index d0c634f..5fb311b 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -29,7 +29,7 @@ public:
const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
}
- void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override {
diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h
index 73a9c73..7601794 100644
--- a/contrib/llvm/lib/Target/R600/SIDefines.h
+++ b/contrib/llvm/lib/Target/R600/SIDefines.h
@@ -163,4 +163,8 @@ namespace SIOutMods {
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
+#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+
+
#endif
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
index 0a3fa2f..6b2ea06 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
+
+ if (Info->getShaderType() != ShaderType::COMPUTE) {
+ unsigned ScratchIdx = CCInfo.getFirstUnallocated(
+ AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs());
+ Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
+ }
return Chain;
}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
index 99a1df3..09c0cbe 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@@ -85,49 +85,41 @@ class Enc64 {
let Uses = [EXEC] in {
-class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- InstSI <(outs VCCReg:$dst), ins, asm, pattern> {
+class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
- let DisableEncoding = "$dst";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
- let VOPC = 1;
let VALU = 1;
+}
+
+class VOPCCommon <dag ins, string asm, list<dag> pattern> :
+ VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ let DisableEncoding = "$dst";
+ let VOPC = 1;
let Size = 4;
}
class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
+ VOPAnyCommon <outs, ins, asm, pattern> {
+
let VOP1 = 1;
- let VALU = 1;
let Size = 4;
}
class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
+ VOPAnyCommon <outs, ins, asm, pattern> {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
let VOP2 = 1;
- let VALU = 1;
let Size = 4;
}
class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
+ VOPAnyCommon <outs, ins, asm, pattern> {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
// Using complex patterns gives VOP3 patterns a very high complexity rating,
// but standalone patterns are almost always prefered, so we need to adjust the
// priority lower. The goal is to use a high number to reduce complexity to
@@ -135,8 +127,6 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let AddedComplexity = -1000;
let VOP3 = 1;
- let VALU = 1;
-
int Size = 8;
}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
index 1a4c0d4..80b560e 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
return AMDGPU::COPY;
}
-static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
-
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-
- // FIXME: Implement spilling for other shader types.
- return MFI->getShaderType() == ShaderType::COMPUTE;
-
-}
-
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
@@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
}
- } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
MFI->setHasSpilledVGPRs();
switch(RC->getSize() * 8) {
@@ -482,7 +473,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
@@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
int Opcode = -1;
@@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
}
- } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
switch(RC->getSize() * 8) {
case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
@@ -528,7 +520,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
@@ -615,7 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
.addImm(-1)
.addImm(0);
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32),
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
TIDReg)
.addImm(-1)
.addReg(TIDReg);
@@ -1053,7 +1045,11 @@ bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
}
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
- return AMDGPU::getVOPe32(Opcode) != -1;
+ int Op32 = AMDGPU::getVOPe32(Opcode);
+ if (Op32 == -1)
+ return false;
+
+ return pseudoToMCOpcode(Op32) != -1;
}
bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
@@ -1126,12 +1122,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
switch (Desc.OpInfo[i].OperandType) {
- case MCOI::OPERAND_REGISTER: {
- if (MI->getOperand(i).isImm() &&
- !isImmOperandLegal(MI, i, MI->getOperand(i))) {
- ErrInfo = "Illegal immediate value for operand.";
- return false;
- }
+ case MCOI::OPERAND_REGISTER:
+ if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
+ ErrInfo = "Illegal immediate value for operand.";
+ return false;
+ }
+ break;
+ case AMDGPU::OPERAND_REG_IMM32:
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C:
+ if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
+ ErrInfo = "Illegal immediate value for operand.";
+ return false;
}
break;
case MCOI::OPERAND_IMMEDIATE:
@@ -1287,7 +1289,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
- case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+ case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
}
@@ -2278,7 +2280,7 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
MachineOperand &Dest = Inst->getOperand(0);
MachineOperand &Src = Inst->getOperand(1);
- const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
+ const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
const TargetRegisterClass *SrcRC = Src.isReg() ?
MRI.getRegClass(Src.getReg()) :
&AMDGPU::SGPR_32RegClass;
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
index f766dc8..28cd27d 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@@ -325,7 +325,6 @@ namespace AMDGPU {
int getVOPe32(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
- int getMCOpcode(uint16_t Opcode, unsigned Gen);
int getAddr64Inst(uint16_t Opcode);
int getAtomicRetOp(uint16_t Opcode);
int getAtomicNoRetOp(uint16_t Opcode);
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
index 7cc9588..175e11d 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -36,6 +36,12 @@ class vop2 <bits<6> si, bits<6> vi = si> : vop {
field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
}
+// Specify a VOP2 opcode for SI and VOP3 opcode for VI
+// that doesn't have VOP2 encoding on VI
+class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
+ let VI3 = vi;
+}
+
class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
let SI3 = si;
let VI3 = vi;
@@ -57,7 +63,7 @@ class sopk <bits<5> si, bits<5> vi = si> {
}
// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
-// in AMDGPUMCInstLower.h
+// in AMDGPUInstrInfo.cpp
def SISubtarget {
int NONE = -1;
int SI = 0;
@@ -731,7 +737,7 @@ class getAsm32 <int NumSrcArgs> {
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
class getAsm64 <int NumSrcArgs, bit HasModifiers> {
- string src0 = "$src0_modifiers,";
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
" $src1_modifiers,"));
@@ -848,6 +854,16 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
let isPseudo = 1;
}
+multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName, string revOpSI> {
+ def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
+
+ def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
+ VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+}
+
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, string revOpSI, string revOpVI> {
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
@@ -889,16 +905,6 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
VOP3e_vi <op>,
SIMCInstr <opName#"_e64", SISubtarget.VI>;
-// VI only instruction
-class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern, int NumSrcArgs, bit HasMods = 1> :
- VOP3Common <outs, ins, asm, pattern>,
- VOP <opName>,
- VOP3e_vi <op>,
- VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
- !if(!eq(NumSrcArgs, 2), 0, 1),
- HasMods>;
-
multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -998,6 +1004,23 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
}
}
+// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
+multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
+ string asm, list<dag> pattern = []> {
+ let isPseudo = 1 in {
+ def "" : VOPAnyCommon <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE>;
+ }
+
+ def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
+ SIMCInstr <opName, SISubtarget.SI>;
+
+ def _vi : VOP3Common <outs, ins, asm, []>,
+ VOP3e_vi <op.VI3>,
+ VOP3DisableFields <1, 0, 0>,
+ SIMCInstr <opName, SISubtarget.VI>;
+}
+
multiclass VOP1_Helper <vop1 op, string opName, dag outs,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
@@ -1089,6 +1112,33 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
revOp, P.HasModifiers
>;
+// A VOP2 instruction that is VOP3-only on VI.
+multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ string revOpSI, string revOpVI, bit HasMods> {
+ defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>;
+
+ defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+ revOpSI, revOpVI, HasMods>;
+}
+
+multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOpSI = opName, string revOpVI = revOpSI>
+ : VOP2_VI3_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+ revOpSI, revOpVI, P.HasModifiers
+>;
+
class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
@@ -1224,34 +1274,6 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
P.NumSrcArgs, P.HasModifiers
>;
-class VOP3InstVI <bits<10> op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag> : VOP3_vi <
- op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64,
- !if(!eq(P.NumSrcArgs, 3),
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
- P.Src2VT:$src2))]),
- !if(!eq(P.NumSrcArgs, 2),
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
- /* P.NumSrcArgs == 1 */,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
- P.NumSrcArgs, P.HasModifiers
->;
-
multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
string opName, list<dag> pattern> :
VOP3b_2_m <
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
index e05b6bb..4b1a846 100644
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -1525,25 +1525,25 @@ defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-def V_READLANE_B32 : VOP2 <
- 0x00000001,
+defm V_READLANE_B32 : VOP2SI_3VI_m <
+ vop3 <0x001, 0x289>,
+ "v_readlane_b32",
(outs SReg_32:$vdst),
(ins VGPR_32:$src0, SSrc_32:$vsrc1),
- "v_readlane_b32 $vdst, $src0, $vsrc1",
- []
+ "v_readlane_b32 $vdst, $src0, $vsrc1"
>;
-def V_WRITELANE_B32 : VOP2 <
- 0x00000002,
+defm V_WRITELANE_B32 : VOP2SI_3VI_m <
+ vop3 <0x002, 0x28a>,
+ "v_writelane_b32",
(outs VGPR_32:$vdst),
(ins SReg_32:$src0, SSrc_32:$vsrc1),
- "v_writelane_b32 $vdst, $src0, $vsrc1",
- []
+ "v_writelane_b32 $vdst, $src0, $vsrc1"
>;
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
VOP_F32_F32_F32
@@ -1568,30 +1568,33 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
}
} // End isCommutable = 1
+} // End let SubtargetPredicate = SICI
-defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32,
- AMDGPUbfm>;
-defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
-defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
+ AMDGPUbfm
+>;
+defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
+ VOP_I32_I32_I32
+>;
+defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
VOP_I32_I32_I32
>;
-defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
+defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
VOP_I32_I32_I32
>;
-defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
+defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
+defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
VOP_I32_F32_F32, int_SI_packf16
>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
-} // End let SubtargetPredicate = SICI
//===----------------------------------------------------------------------===//
// VOP3 Instructions
//===----------------------------------------------------------------------===//
@@ -1656,9 +1659,6 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
VOP_I32_I32_I32_I32
>;
-// Only on SI
-defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
- VOP_F32_F32_F32_F32>;
defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
VOP_F32_F32_F32_F32, AMDGPUfmin3>;
@@ -1699,20 +1699,6 @@ defm V_DIV_FIXUP_F64 : VOP3Inst <
} // let SchedRW = [WriteDouble]
-defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
- VOP_I64_I64_I32, shl
->;
-
-// Only on SI
-defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
- VOP_I64_I64_I32, srl
->;
-
-// Only on SI
-defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
- VOP_I64_I64_I32, sra
->;
-
let SchedRW = [WriteDouble] in {
let isCommutable = 1 in {
@@ -1785,6 +1771,26 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
} // let SchedRW = [WriteDouble]
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
+ VOP_I64_I64_I32, shl
+>;
+
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
+ VOP_I64_I64_I32, srl
+>;
+
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
+ VOP_I64_I64_I32, sra
+>;
+
+defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
+ VOP_F32_F32_F32_F32>;
+
+} // End SubtargetPredicate = isSICI
+
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1943,14 +1949,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1 in {
def _SAVE : InstSI <
(outs),
- (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1
@@ -1966,14 +1972,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1 in {
def _SAVE : InstSI <
(outs),
- (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1
@@ -2728,16 +2734,12 @@ def : Pat <
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
>;
-let Predicates = [isSICI] in {
-
def : Pat <
(int_SI_tid),
- (V_MBCNT_HI_U32_B32_e32 0xffffffff,
+ (V_MBCNT_HI_U32_B32_e64 0xffffffff,
(V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
>;
-}
-
//===----------------------------------------------------------------------===//
// VOP3 Patterns
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
index 7185271..667da4c 100644
--- a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
@@ -50,6 +50,7 @@ public:
unsigned NumUserSGPRs;
std::map<unsigned, unsigned> LaneVGPRs;
unsigned LDSWaveSpillSize;
+ unsigned ScratchOffsetReg;
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
diff --git a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
index f0e7ede..0a57a5b 100644
--- a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
+++ b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
@@ -84,28 +84,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
Entry->addLiveIn(ScratchOffsetPreloadReg);
- // Load the scratch pointer
- unsigned ScratchPtrReg =
- TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
- int ScratchPtrFI = -1;
-
- if (ScratchPtrReg != AMDGPU::NoRegister) {
- // Found an SGPR to use.
- MRI.setPhysRegUsed(ScratchPtrReg);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
- .addReg(ScratchPtrPreloadReg);
- } else {
- // No SGPR is available, we must spill.
- ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
- .addReg(ScratchPtrPreloadReg)
- .addFrameIndex(ScratchPtrFI);
- }
-
// Load the scratch offset.
unsigned ScratchOffsetReg =
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
- int ScratchOffsetFI = ~0;
+ int ScratchOffsetFI = -1;
if (ScratchOffsetReg != AMDGPU::NoRegister) {
// Found an SGPR to use
@@ -117,7 +99,9 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
.addReg(ScratchOffsetPreloadReg)
- .addFrameIndex(ScratchOffsetFI);
+ .addFrameIndex(ScratchOffsetFI)
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
}
@@ -125,22 +109,27 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
// add them to all the SI_SPILL_V* instructions.
RegScavenger RS;
- bool UseRegScavenger =
- (ScratchPtrReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister);
+ unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
+ RS.addScavengingFrameIndex(ScratchRsrcFI);
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
- if (UseRegScavenger)
- RS.enterBasicBlock(&MBB);
+ // Add the scratch offset reg as a live-in so that the register scavenger
+ // doesn't re-use it.
+ if (!MBB.isLiveIn(ScratchOffsetReg) &&
+ ScratchOffsetReg != AMDGPU::NoRegister)
+ MBB.addLiveIn(ScratchOffsetReg);
+ RS.enterBasicBlock(&MBB);
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
+ RS.forward(I);
DebugLoc DL = MI.getDebugLoc();
switch(MI.getOpcode()) {
- default: break;;
+ default: break;
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
@@ -153,43 +142,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE:
- // Scratch Pointer
- if (ScratchPtrReg == AMDGPU::NoRegister) {
- ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
- ScratchPtrReg)
- .addFrameIndex(ScratchPtrFI)
- .addReg(AMDGPU::NoRegister)
- .addReg(AMDGPU::NoRegister);
- } else if (!MBB.isLiveIn(ScratchPtrReg)) {
- MBB.addLiveIn(ScratchPtrReg);
- }
+ // Scratch resource
+ unsigned ScratchRsrcReg =
+ RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
+
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+ .addImm(Rsrc & 0xffffffff)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+ .addImm(Rsrc >> 32)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ // Scratch Offset
if (ScratchOffsetReg == AMDGPU::NoRegister) {
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
ScratchOffsetReg)
.addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::NoRegister)
- .addReg(AMDGPU::NoRegister);
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
} else if (!MBB.isLiveIn(ScratchOffsetReg)) {
MBB.addLiveIn(ScratchOffsetReg);
}
- if (ScratchPtrReg == AMDGPU::NoRegister ||
+ if (ScratchRsrcReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchPtrReg = AMDGPU::SGPR0;
+ ScratchRsrcReg = AMDGPU::SGPR0;
ScratchOffsetReg = AMDGPU::SGPR0;
}
- MI.getOperand(2).setReg(ScratchPtrReg);
+ MI.getOperand(2).setReg(ScratchRsrcReg);
+ MI.getOperand(2).setIsKill(true);
+ MI.getOperand(2).setIsUndef(false);
MI.getOperand(3).setReg(ScratchOffsetReg);
+ MI.getOperand(3).setIsUndef(false);
+ MI.getOperand(3).setIsKill(false);
+ MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
break;
}
- if (UseRegScavenger)
- RS.forward();
}
}
return true;
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
index f9feea4..0396bf3 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
@@ -23,7 +23,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
@@ -51,9 +50,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- return RC->getNumRegs();
+unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+
+ // FIXME: We should adjust the max number of waves based on LDS size.
+ unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+
+ for (regclass_iterator I = regclass_begin(), E = regclass_end();
+ I != E; ++I) {
+
+ unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+ unsigned Limit;
+
+ if (isSGPRClass(*I)) {
+ Limit = SGPRLimit / NumSubRegs;
+ } else {
+ Limit = VGPRLimit / NumSubRegs;
+ }
+
+ const int *Sets = getRegClassPressureSets(*I);
+ assert(Sets);
+ for (unsigned i = 0; Sets[i] != -1; ++i) {
+ if (Sets[i] == (int)Idx)
+ return Limit;
+ }
+ }
+ return 256;
}
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
@@ -98,7 +120,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
unsigned Value,
- unsigned ScratchPtr,
+ unsigned ScratchRsrcReg,
unsigned ScratchOffset,
int64_t Offset,
RegScavenger *RS) const {
@@ -113,33 +135,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
bool RanOutOfSGPRs = false;
unsigned SOffset = ScratchOffset;
- unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
- if (RsrcReg == AMDGPU::NoRegister) {
- RanOutOfSGPRs = true;
- RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
- }
-
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned Size = NumSubRegs * 4;
- uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
- getSubReg(RsrcReg, AMDGPU::sub0_sub1))
- .addReg(ScratchPtr)
- .addReg(RsrcReg, RegState::ImplicitDefine);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
- getSubReg(RsrcReg, AMDGPU::sub2))
- .addImm(Rsrc & 0xffffffff)
- .addReg(RsrcReg, RegState::ImplicitDefine);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
- getSubReg(RsrcReg, AMDGPU::sub3))
- .addImm(Rsrc >> 32)
- .addReg(RsrcReg, RegState::ImplicitDefine);
-
if (!isUInt<12>(Offset + Size)) {
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
if (SOffset == AMDGPU::NoRegister) {
@@ -163,9 +161,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
- .addReg(RsrcReg, getKillRegState(IsKill))
+ .addReg(ScratchRsrcReg, getKillRegState(IsKill))
.addImm(Offset)
- .addReg(SOffset, getKillRegState(IsKill))
+ .addReg(SOffset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
@@ -235,9 +233,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
}
- if (isM0) {
+ if (isM0)
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
- }
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(Spill.VGPR)
@@ -262,7 +259,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V32_SAVE:
buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
@@ -274,7 +271,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V512_RESTORE: {
buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
@@ -289,7 +286,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
.addImm(Offset);
- FIOp.ChangeToRegister(TmpReg, false);
+ FIOp.ChangeToRegister(TmpReg, false, false, true);
}
}
}
@@ -446,6 +443,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
case SIRegisterInfo::TGID_Z:
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
+ if (MFI->getShaderType() != ShaderType::COMPUTE)
+ return MFI->ScratchOffsetReg;
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
case SIRegisterInfo::SCRATCH_PTR:
return AMDGPU::SGPR2_SGPR3;
@@ -475,3 +474,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
return AMDGPU::NoRegister;
}
+unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
+ switch(WaveCount) {
+ case 10: return 24;
+ case 9: return 28;
+ case 8: return 32;
+ case 7: return 36;
+ case 6: return 40;
+ case 5: return 48;
+ case 4: return 64;
+ case 3: return 84;
+ case 2: return 128;
+ default: return 256;
+ }
+}
+
+unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
+ switch(WaveCount) {
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return 103;
+ }
+}
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
index d14212c..d908ffd 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
@@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "llvm/Support/Debug.h"
namespace llvm {
@@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const override;
- unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const override;
+ unsigned getRegPressureSetLimit(unsigned Idx) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
@@ -105,13 +105,21 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
unsigned getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const;
+ /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
+ /// concurrent waves.
+ unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
+
+ /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
+ /// concurrent waves.
+ unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
- unsigned ScratchPtr, unsigned ScratchOffset,
+ unsigned ScratchRsrcReg, unsigned ScratchOffset,
int64_t Offset, RegScavenger *RS) const;
};
diff --git a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
index f91d117..6a34106 100644
--- a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
@@ -10,6 +10,7 @@
//
#include "AMDGPU.h"
+#include "AMDGPUMCInstLower.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/Statistic.h"
@@ -206,13 +207,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
-
- // Op32 could be -1 here if we started with an instruction that had a
+ // getVOPe32 could be -1 here if we started with an instruction that had
// a 32-bit encoding and then commuted it to an instruction that did not.
- if (Op32 == -1)
+ if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
+ int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
if (TII->isVOPC(Op32)) {
unsigned DstReg = MI.getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
diff --git a/contrib/llvm/lib/Target/R600/VIInstructions.td b/contrib/llvm/lib/Target/R600/VIInstructions.td
index 07cfa29..24e66ce 100644
--- a/contrib/llvm/lib/Target/R600/VIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/VIInstructions.td
@@ -11,22 +11,6 @@
let SubtargetPredicate = isVI in {
-def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32,
- AMDGPUldexp
->;
-def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>;
-def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
-def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32",
- VOP_I32_I32_I32
->;
-def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32",
- VOP_I32_I32_I32
->;
-
-def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32",
- VOP_I32_F32_F32, int_SI_packf16
->;
-
defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
0x14, "buffer_load_dword", VGPR_32, i32, global_load
>;
@@ -37,22 +21,13 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
} // End SubtargetPredicate = isVI
-//===----------------------------------------------------------------------===//
-// VOP2 Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isVI] in {
-
-def : Pat <
- (int_SI_tid),
- (V_MBCNT_HI_U32_B32 0xffffffff,
- (V_MBCNT_LO_U32_B32 0xffffffff, 0))
->;
//===----------------------------------------------------------------------===//
// SMEM Patterns
//===----------------------------------------------------------------------===//
+let Predicates = [isVI] in {
+
// 1. Offset as 8bit DWORD immediate
def : Pat <
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
OpenPOWER on IntegriCloud