summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-05-12 18:45:56 +0000
committerdim <dim@FreeBSD.org>2014-05-12 18:45:56 +0000
commit2f29f665c9ba510d8c9d2fab818bfe63d74f0ba2 (patch)
tree35d5a050f878d9a554807408b0128b0347abf2c1 /contrib/llvm/lib/Target/R600
parentd71c133cadfe28aaac02dddebb466b72bf312739 (diff)
downloadFreeBSD-src-2f29f665c9ba510d8c9d2fab818bfe63d74f0ba2.zip
FreeBSD-src-2f29f665c9ba510d8c9d2fab818bfe63d74f0ba2.tar.gz
Upgrade our copy of llvm/clang to 3.4.1 release. This release contains
mostly fixes, for the following upstream bugs: http://llvm.org/PR16365 http://llvm.org/PR17473 http://llvm.org/PR18000 http://llvm.org/PR18068 http://llvm.org/PR18102 http://llvm.org/PR18165 http://llvm.org/PR18260 http://llvm.org/PR18290 http://llvm.org/PR18316 http://llvm.org/PR18460 http://llvm.org/PR18473 http://llvm.org/PR18515 http://llvm.org/PR18526 http://llvm.org/PR18600 http://llvm.org/PR18762 http://llvm.org/PR18773 http://llvm.org/PR18860 http://llvm.org/PR18994 http://llvm.org/PR19007 http://llvm.org/PR19010 http://llvm.org/PR19033 http://llvm.org/PR19059 http://llvm.org/PR19144 http://llvm.org/PR19326 MFC after: 2 weeks
Diffstat (limited to 'contrib/llvm/lib/Target/R600')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstructions.td7
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp1
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/R600/R600Instructions.td6
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/SIInsertWaits.cpp6
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.td64
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstructions.td108
-rw-r--r--contrib/llvm/lib/Target/R600/SIIntrinsics.td16
-rw-r--r--contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp42
12 files changed, 210 insertions, 54 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index c4d75ff..1029f30 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -133,6 +133,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
index 3c5375d..7acd673 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -388,6 +388,11 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
// Bitfield extract patterns
+/*
+
+XXX: The BFE pattern is not working correctly because the XForm is not being
+applied.
+
def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
@@ -397,6 +402,8 @@ class BFEPattern <Instruction BFE> : Pat <
(BFE $x, $y, $z)
>;
+*/
+
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
(rotr i32:$src0, i32:$src1),
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 4a8e1b0..9b26af7 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -13,7 +13,6 @@
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
HasSingleParameterDotFile = false;
- WeakDefDirective = 0;
//===------------------------------------------------------------------===//
HasSubsectionsViaSymbols = true;
HasMachoZeroFillDirective = false;
@@ -58,7 +57,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
WeakRefDirective = ".weakref\t";
- LinkOnceDirective = 0;
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index ac3d8f6..2a8276b 100644
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -356,6 +356,7 @@ public:
DEBUG(dbgs() << CfCount << ":"; I->dump(););
FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
+ LastAlu.back() = 0;
continue;
}
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
index c0827fc..2eca6cf 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -716,7 +716,13 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
}
- // Get the last instruction in the block.
+ // Remove successive JUMP
+ while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) {
+ MachineBasicBlock::iterator PriorI = llvm::prior(I);
+ if (AllowModify)
+ I->removeFromParent();
+ I = PriorI;
+ }
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
index 0346e24..74c65da 100644
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@@ -1516,7 +1516,9 @@ let Predicates = [isEGorCayman] in {
i32:$src2))],
VecALU
>;
- def : BFEPattern <BFE_UINT_eg>;
+// XXX: This pattern is broken, disabling for now. See comment in
+// AMDGPUInstructions.td for more info.
+// def : BFEPattern <BFE_UINT_eg>;
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
defm : BFIPatterns <BFI_INT_eg>;
@@ -1636,7 +1638,6 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
let src2 = 0;
let src2_rel = 0;
- let Defs = [OQAP];
let usesCustomInserter = 1;
let LDS_1A = 1;
let DisableEncoding = "$dst";
@@ -1672,7 +1673,6 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
let BaseOp = name;
let usesCustomInserter = 1;
let DisableEncoding = "$dst";
- let Defs = [OQAP];
}
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
index 3370c79..f0065ea 100644
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -187,7 +187,7 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
DstRC == &AMDGPU::M0RegRegClass)
return false;
- SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
+ SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
}
diff --git a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
index 7ef662e..695ec40 100644
--- a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
@@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
Counters Result = ZeroCounts;
+ // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
+ // but we also want to wait for any other outstanding transfers before
+ // signalling other hardware blocks
+ if (MI.getOpcode() == AMDGPU::S_SENDMSG)
+ return LastIssued;
+
// For each register affected by this
// instruction increase the result sequence
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
index 4cd0daa..b7879c6 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -290,10 +290,10 @@ multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
: VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
- string revOp = opName> {
+ RegisterClass src0_rc, string revOp = opName> {
def _e32 : VOP2 <
- op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+ op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
opName#"_e32 $dst, $src0, $src1", pattern
>, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
@@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
- let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
- mayLoad = 1 in {
-
- let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
- def _OFFEN : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr),
- asm#" $vdata, $srsrc + $vaddr", []>;
- }
-
- let offen = 0, idxen = 1, addr64 = 0 in {
- def _IDXEN : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
- asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
- }
+ let lds = 0, mayLoad = 1 in {
+
+ let addr64 = 0 in {
+
+ let offen = 0, idxen = 0 in {
+ def _OFFSET : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
+ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+ i1imm:$slc, i1imm:$tfe),
+ asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+
+ let offen = 1, idxen = 0, offset = 0 in {
+ def _OFFEN : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
+ SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
+ i1imm:$tfe),
+ asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+
+ let offen = 0, idxen = 1 in {
+ def _IDXEN : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_32:$vaddr,
+ i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+ i1imm:$slc, i1imm:$tfe),
+ asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr,
+ SSrc_32:$soffset, i1imm:$glc,
+ i1imm:$slc, i1imm:$tfe),
+ asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ }
+ }
- let offen = 0, idxen = 0, addr64 = 1 in {
- def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
- asm#" $vdata, $srsrc + $vaddr + $offset", []>;
- }
+ let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+ def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+ asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+ }
}
}
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
index 76f05eb..2ca6a95 100644
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -22,6 +22,8 @@ def InterpSlot : Operand<i32> {
let PrintMethod = "printInterpSlot";
}
+def SendMsgImm : Operand<i32>;
+
def isSI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
@@ -826,17 +828,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
[]
>;
-} // End hasSideEffects
//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+
+let Uses = [EXEC] in {
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+ > {
+ let DisableEncoding = "$m0";
+ }
+} // End Uses = [EXEC]
+
//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+} // End hasSideEffects
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
@@ -979,14 +989,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
+ "V_SUB_I32">;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
+ "V_SUBB_U32">;
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
@@ -1403,7 +1415,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
/* int_SI_export */
@@ -1658,16 +1670,30 @@ def : Pat <
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>;
+/********** ================================ **********/
+/********** Floating point absolute/negative **********/
+/********** ================================ **********/
+
+// Manipulate the sign bit directly, as e.g. using the source negation modifier
+// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
+// breaking the piglit *s-floatBitsToInt-neg* tests
+
+// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
+// removing these patterns
+
+def : Pat <
+ (fneg (fabs f32:$src)),
+ (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+>;
+
def : Pat <
(fabs f32:$src),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+ (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
>;
def : Pat <
(fneg f32:$src),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+ (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
>;
/********** ================== **********/
@@ -1794,6 +1820,11 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;
+def : Pat <
+ (i32 (zext i1:$src0)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+>;
+
// 1. Offset as 8bit DWORD immediate
def : Pat <
(SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
@@ -1809,7 +1840,7 @@ def : Pat <
// 3. Offset in an 32Bit VGPR
def : Pat <
(SIload_constant i128:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
+ (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -1970,6 +2001,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
+ MUBUF bothen> {
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm:$offset, 0, 0, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (as_i1imm $slc), (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm, 1, 0, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm:$offset, 0, 1, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (as_i1imm $slc), (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+ imm, 1, 1, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (as_i1imm $tfe))
+ >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+ BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+ BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+ BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
@@ -2057,6 +2132,11 @@ def : Pat <
(EXTRACT_SUBREG $a, sub0)
>;
+def : Pat <
+ (i1 (trunc i32:$a)),
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+>;
+
// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
// case, the sgpr-copies pass will fix this to use the vector version.
def : Pat <
diff --git a/contrib/llvm/lib/Target/R600/SIIntrinsics.td b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
index 7fcc964..00e32c0 100644
--- a/contrib/llvm/lib/Target/R600/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
@@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {
llvm_i32_ty], // tfe(imm)
[]>;
+ // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
+ def int_SI_buffer_load_dword : Intrinsic <
+ [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+ [llvm_anyint_ty, // rsrc(SGPR)
+ llvm_anyint_ty, // vaddr(VGPR)
+ llvm_i32_ty, // soffset(SGPR)
+ llvm_i32_ty, // inst_offset(imm)
+ llvm_i32_ty, // offen(imm)
+ llvm_i32_ty, // idxen(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty], // tfe(imm)
+ [IntrReadArgMem]>;
+
+ def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_sample : Sample;
diff --git a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
index 958763d..ef867d3 100644
--- a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
@@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
+static bool isDS(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DS_ADD_U32_RTN:
+ case AMDGPU::DS_SUB_U32_RTN:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B8:
+ case AMDGPU::DS_WRITE_B16:
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_I8:
+ case AMDGPU::DS_READ_U8:
+ case AMDGPU::DS_READ_I16:
+ case AMDGPU::DS_READ_U16:
+ return true;
+ }
+}
+
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
MachineBasicBlock *To) {
@@ -145,7 +162,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+ if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+ ShaderType::PIXEL ||
+ !shouldSkip(&MBB, &MBB.getParent()->back()))
return;
MachineBasicBlock::iterator Insert = &MI;
@@ -296,9 +315,11 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- // Kill is only allowed in pixel shaders
+ // Kill is only allowed in pixel / geometry shaders
assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
- ShaderType::PIXEL);
+ ShaderType::PIXEL ||
+ MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+ ShaderType::GEOMETRY);
// Clear this pixel from the exec mask if the operand is negative
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
@@ -431,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = llvm::next(I);
MachineInstr &MI = *I;
+ if (isDS(MI.getOpcode())) {
+ NeedM0 = true;
+ NeedWQM = true;
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
@@ -491,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectDst(MI);
break;
- case AMDGPU::DS_READ_B32:
- NeedWQM = true;
- // Fall through
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_ADD_U32_RTN:
- NeedM0 = true;
- break;
-
case AMDGPU::V_INTERP_P1_F32:
case AMDGPU::V_INTERP_P2_F32:
case AMDGPU::V_INTERP_MOV_F32:
@@ -517,7 +535,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::M0).addImm(0xffffffff);
}
- if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
+ if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
OpenPOWER on IntegriCloud