diff options
author | dim <dim@FreeBSD.org> | 2014-03-21 17:53:59 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2014-03-21 17:53:59 +0000 |
commit | 9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch) | |
tree | c978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/Target/R600/R600Instructions.td | |
parent | 03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff) | |
download | FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz |
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports
all of the features in the current working draft of the upcoming C++
standard, provisionally named C++1y.
The code generator's performance is greatly increased, and the loop
auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The
PowerPC backend has made several major improvements to code generation
quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ
backends have all seen major feature work.
Release notes for llvm and clang can be found here:
<http://llvm.org/releases/3.4/docs/ReleaseNotes.html>
<http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html>
MFC 262121 (by emaste):
Update lldb for clang/llvm 3.4 import
This commit largely restores the lldb source to the upstream r196259
snapshot with the addition of threaded inferior support and a few bug
fixes.
Specific upstream lldb revisions restored include:
SVN git
181387 779e6ac
181703 7bef4e2
182099 b31044e
182650 f2dcf35
182683 0d91b80
183862 15c1774
183929 99447a6
184177 0b2934b
184948 4dc3761
184954 007e7bc
186990 eebd175
Sponsored by: DARPA, AFRL
MFC 262186 (by emaste):
Fix mismerge in r262121
A break statement was lost in the merge. The error had no functional
impact, but restore it to reduce the diff against upstream.
MFC 262303:
Pull in r197521 from upstream clang trunk (by rdivacky):
Use the integrated assembler by default on FreeBSD/ppc and ppc64.
Requested by: jhibbits
MFC 262611:
Pull in r196874 from upstream llvm trunk:
Fix a crash that occurs when PWD is invalid.
MCJIT needs to be able to run in hostile environments, even when PWD
is invalid. There's no need to crash MCJIT in this case.
The obvious fix is to simply leave MCContext's CompilationDir empty
when PWD can't be determined. This way, MCJIT clients,
and other clients that link with LLVM don't need a valid working directory.
If we do want to guarantee valid CompilationDir, that should be done
only for clients of getCompilationDir(). This is as simple as checking
for an empty string.
The only current use of getCompilationDir is EmitGenDwarfInfo, which
won't conceivably run with an invalid working dir. However, in the
purely hypothetically and untestable case that this happens, the
AT_comp_dir will be omitted from the compilation_unit DIE.
This should help fix assertions occurring with ports-mgmt/tinderbox,
when it is using jails, and sometimes invalidates clang's current
working directory.
Reported by: decke
MFC 262809:
Pull in r203007 from upstream clang trunk:
Don't produce an alias between destructors with different calling conventions.
Fixes pr19007.
(Please note that is an LLVM PR identifier, not a FreeBSD one.)
This should fix Firefox and/or libxul crashes (due to problems with
regparm/stdcall calling conventions) on i386.
Reported by: multiple users on freebsd-current
PR: bin/187103
MFC 263048:
Repair recognition of "CC" as an alias for the C++ compiler, since it
was silently broken by upstream for a Windows-specific use-case.
Apparently some versions of CMake still rely on this archaic feature...
Reported by: rakuco
MFC 263049:
Garbage collect the old way of adding the libstdc++ include directories
in clang's InitHeaderSearch.cpp. This has been superseded by David
Chisnall's commit in r255321.
Moreover, if libc++ is used, the libstdc++ include directories should
not be in the search path at all. These directories are now only used
if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/R600Instructions.td')
-rw-r--r-- | contrib/llvm/lib/Target/R600/R600Instructions.td | 1715 |
1 files changed, 837 insertions, 878 deletions
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td index 8f47523..0346e24 100644 --- a/contrib/llvm/lib/Target/R600/R600Instructions.td +++ b/contrib/llvm/lib/Target/R600/R600Instructions.td @@ -12,44 +12,7 @@ //===----------------------------------------------------------------------===// include "R600Intrinsics.td" - -class InstR600 <dag outs, dag ins, string asm, list<dag> pattern, - InstrItinClass itin> - : AMDGPUInst <outs, ins, asm, pattern> { - - field bits<64> Inst; - bit TransOnly = 0; - bit Trig = 0; - bit Op3 = 0; - bit isVector = 0; - bits<2> FlagOperandIdx = 0; - bit Op1 = 0; - bit Op2 = 0; - bit HasNativeOperands = 0; - bit VTXInst = 0; - bit TEXInst = 0; - - let Namespace = "AMDGPU"; - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asm; - let Pattern = pattern; - let Itinerary = itin; - - let TSFlags{0} = TransOnly; - let TSFlags{4} = Trig; - let TSFlags{5} = Op3; - - // Vector instructions are instructions that must fill all slots in an - // instruction group - let TSFlags{6} = isVector; - let TSFlags{8-7} = FlagOperandIdx; - let TSFlags{9} = HasNativeOperands; - let TSFlags{10} = Op1; - let TSFlags{11} = Op2; - let TSFlags{12} = VTXInst; - let TSFlags{13} = TEXInst; -} +include "R600InstrFormats.td" class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : InstR600 <outs, ins, asm, pattern, NullALU> { @@ -96,6 +59,12 @@ def UP : InstFlag <"printUpdatePred">; // Once we start using the packetizer in this backend we should have this // default to 0. def LAST : InstFlag<"printLast", 1>; +def RSel : Operand<i32> { + let PrintMethod = "printRSel"; +} +def CT: Operand<i32> { + let PrintMethod = "printCT"; +} def FRAMEri : Operand<iPTR> { let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); @@ -106,237 +75,7 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; -def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; - -class R600ALU_Word0 { - field bits<32> Word0; - - bits<11> src0; - bits<1> src0_neg; - bits<1> src0_rel; - bits<11> src1; - bits<1> src1_rel; - bits<1> src1_neg; - bits<3> index_mode = 0; - bits<2> pred_sel; - bits<1> last; - - bits<9> src0_sel = src0{8-0}; - bits<2> src0_chan = src0{10-9}; - bits<9> src1_sel = src1{8-0}; - bits<2> src1_chan = src1{10-9}; - - let Word0{8-0} = src0_sel; - let Word0{9} = src0_rel; - let Word0{11-10} = src0_chan; - let Word0{12} = src0_neg; - let Word0{21-13} = src1_sel; - let Word0{22} = src1_rel; - let Word0{24-23} = src1_chan; - let Word0{25} = src1_neg; - let Word0{28-26} = index_mode; - let Word0{30-29} = pred_sel; - let Word0{31} = last; -} - -class R600ALU_Word1 { - field bits<32> Word1; - - bits<11> dst; - bits<3> bank_swizzle; - bits<1> dst_rel; - bits<1> clamp; - - bits<7> dst_sel = dst{6-0}; - bits<2> dst_chan = dst{10-9}; - - let Word1{20-18} = bank_swizzle; - let Word1{27-21} = dst_sel; - let Word1{28} = dst_rel; - let Word1{30-29} = dst_chan; - let Word1{31} = clamp; -} - -class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{ - - bits<1> src0_abs; - bits<1> src1_abs; - bits<1> update_exec_mask; - bits<1> update_pred; - bits<1> write; - bits<2> omod; - - let Word1{0} = src0_abs; - let Word1{1} = src1_abs; - let Word1{2} = update_exec_mask; - let Word1{3} = update_pred; - let Word1{4} = write; - let Word1{6-5} = omod; - let Word1{17-7} = alu_inst; -} -class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ - - bits<11> src2; - bits<1> src2_rel; - bits<1> src2_neg; - - bits<9> src2_sel = src2{8-0}; - bits<2> src2_chan = src2{10-9}; - - let Word1{8-0} = src2_sel; - let Word1{9} = src2_rel; - let Word1{11-10} = src2_chan; - let Word1{12} = src2_neg; - let Word1{17-13} = alu_inst; -} - -class VTX_WORD0 { - field bits<32> Word0; - bits<7> SRC_GPR; - bits<5> VC_INST; - bits<2> FETCH_TYPE; - bits<1> FETCH_WHOLE_QUAD; - bits<8> BUFFER_ID; - bits<1> SRC_REL; - bits<2> SRC_SEL_X; - bits<6> MEGA_FETCH_COUNT; - - let Word0{4-0} = VC_INST; - let Word0{6-5} = FETCH_TYPE; - let Word0{7} = FETCH_WHOLE_QUAD; - let Word0{15-8} = BUFFER_ID; - let Word0{22-16} = SRC_GPR; - let Word0{23} = SRC_REL; - let Word0{25-24} = SRC_SEL_X; - let Word0{31-26} = MEGA_FETCH_COUNT; -} - -class VTX_WORD1_GPR { - field bits<32> Word1; - bits<7> DST_GPR; - bits<1> DST_REL; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; - bits<1> USE_CONST_FIELDS; - bits<6> DATA_FORMAT; - bits<2> NUM_FORMAT_ALL; - bits<1> FORMAT_COMP_ALL; - bits<1> SRF_MODE_ALL; - - let Word1{6-0} = DST_GPR; - let Word1{7} = DST_REL; - let Word1{8} = 0; // Reserved - let Word1{11-9} = DST_SEL_X; - let Word1{14-12} = DST_SEL_Y; - let Word1{17-15} = DST_SEL_Z; - let Word1{20-18} = DST_SEL_W; - let Word1{21} = USE_CONST_FIELDS; - let Word1{27-22} = DATA_FORMAT; - let Word1{29-28} = NUM_FORMAT_ALL; - let Word1{30} = FORMAT_COMP_ALL; - let Word1{31} = SRF_MODE_ALL; -} - -class TEX_WORD0 { - field bits<32> Word0; - - bits<5> TEX_INST; - bits<2> INST_MOD; - bits<1> FETCH_WHOLE_QUAD; - bits<8> RESOURCE_ID; - bits<7> SRC_GPR; - bits<1> SRC_REL; - bits<1> ALT_CONST; - bits<2> RESOURCE_INDEX_MODE; - bits<2> SAMPLER_INDEX_MODE; - - let Word0{4-0} = TEX_INST; - let Word0{6-5} = INST_MOD; - let Word0{7} = FETCH_WHOLE_QUAD; - let Word0{15-8} = RESOURCE_ID; - let Word0{22-16} = SRC_GPR; - let Word0{23} = SRC_REL; - let Word0{24} = ALT_CONST; - let Word0{26-25} = RESOURCE_INDEX_MODE; - let Word0{28-27} = SAMPLER_INDEX_MODE; -} - -class TEX_WORD1 { - field bits<32> Word1; - - bits<7> DST_GPR; - bits<1> DST_REL; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; - bits<7> LOD_BIAS; - bits<1> COORD_TYPE_X; - bits<1> COORD_TYPE_Y; - bits<1> COORD_TYPE_Z; - bits<1> COORD_TYPE_W; - - let Word1{6-0} = DST_GPR; - let Word1{7} = DST_REL; - let Word1{11-9} = DST_SEL_X; - let Word1{14-12} = DST_SEL_Y; - let Word1{17-15} = DST_SEL_Z; - let Word1{20-18} = DST_SEL_W; - let Word1{27-21} = LOD_BIAS; - let Word1{28} = COORD_TYPE_X; - let Word1{29} = COORD_TYPE_Y; - let Word1{30} = COORD_TYPE_Z; - let Word1{31} = COORD_TYPE_W; -} - -class TEX_WORD2 { - field bits<32> Word2; - - bits<5> OFFSET_X; - bits<5> OFFSET_Y; - bits<5> OFFSET_Z; - bits<5> SAMPLER_ID; - bits<3> SRC_SEL_X; - bits<3> SRC_SEL_Y; - bits<3> SRC_SEL_Z; - bits<3> SRC_SEL_W; - - let Word2{4-0} = OFFSET_X; - let Word2{9-5} = OFFSET_Y; - let Word2{14-10} = OFFSET_Z; - let Word2{19-15} = SAMPLER_ID; - let Word2{22-20} = SRC_SEL_X; - let Word2{25-23} = SRC_SEL_Y; - let Word2{28-26} = SRC_SEL_Z; - let Word2{31-29} = SRC_SEL_W; -} - -/* -XXX: R600 subtarget uses a slightly different encoding than the other -subtargets. We currently handle this in R600MCCodeEmitter, but we may -want to use these instruction classes in the future. - -class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { - - bits<1> fog_merge; - bits<10> alu_inst; - - let Inst{37} = fog_merge; - let Inst{39-38} = omod; - let Inst{49-40} = alu_inst; -} - -class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { - - bits<11> alu_inst; - - let Inst{38-37} = omod; - let Inst{49-39} = alu_inst; -} -*/ def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), (ops PRED_SEL_OFF)>; @@ -358,7 +97,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, BANK_SWIZZLE:$bank_swizzle), !strconcat(" ", opName, - "$last$clamp $dst$write$dst_rel$omod, " + "$clamp $last $dst$write$dst_rel$omod, " "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " "$pred_sel $bank_swizzle"), pattern, @@ -374,7 +113,9 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, let update_pred = 0; let HasNativeOperands = 1; let Op1 = 1; + let ALUInst = 1; let DisableEncoding = "$literal"; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -386,7 +127,7 @@ class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] >; -// If you add our change the operands for R600_2OP instructions, you must +// If you add or change the operands for R600_2OP instructions, you must // also update the R600Op2OperandIndex::ROI enum in R600Defines.h, // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). class R600_2OP <bits<11> inst, string opName, list<dag> pattern, @@ -399,7 +140,7 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, BANK_SWIZZLE:$bank_swizzle), !strconcat(" ", opName, - "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " + "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, " "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " "$pred_sel $bank_swizzle"), @@ -410,7 +151,9 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, let HasNativeOperands = 1; let Op2 = 1; + let ALUInst = 1; let DisableEncoding = "$literal"; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -436,7 +179,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, BANK_SWIZZLE:$bank_swizzle), - !strconcat(" ", opName, "$last$clamp $dst$dst_rel, " + !strconcat(" ", opName, "$clamp $last $dst$dst_rel, " "$src0_neg$src0$src0_rel, " "$src1_neg$src1$src1_rel, " "$src2_neg$src2$src2_rel, " @@ -450,6 +193,8 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, let HasNativeOperands = 1; let DisableEncoding = "$literal"; let Op3 = 1; + let UseNamedOperandTable = 1; + let ALUInst = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -463,38 +208,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, pattern, itin>; -class R600_TEX <bits<11> inst, string opName, list<dag> pattern, - InstrItinClass itin = AnyALU> : - InstR600 <(outs R600_Reg128:$DST_GPR), - (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget), - !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"), - pattern, - itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { - let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - - let TEX_INST = inst{4-0}; - let SRC_REL = 0; - let DST_REL = 0; - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 2; - let DST_SEL_W = 3; - let LOD_BIAS = 0; - - let INST_MOD = 0; - let FETCH_WHOLE_QUAD = 0; - let ALT_CONST = 0; - let SAMPLER_INDEX_MODE = 0; - let RESOURCE_INDEX_MODE = 0; - - let COORD_TYPE_X = 0; - let COORD_TYPE_Y = 0; - let COORD_TYPE_Z = 0; - let COORD_TYPE_W = 0; - - let TEXInst = 1; - } + } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -515,7 +229,7 @@ def TEX_RECT : PatLeaf< def TEX_ARRAY : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return TType == 9 || TType == 10 || TType == 15 || TType == 16; + return TType == 9 || TType == 10 || TType == 16; }] >; @@ -526,76 +240,115 @@ def TEX_SHADOW_ARRAY : PatLeaf< }] >; -class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, - dag ins, string asm, list<dag> pattern> : - InstR600ISA <outs, ins, asm, pattern> { - bits<7> RW_GPR; - bits<7> INDEX_GPR; - - bits<2> RIM; - bits<2> TYPE; - bits<1> RW_REL; - bits<2> ELEM_SIZE; - - bits<12> ARRAY_SIZE; - bits<4> COMP_MASK; - bits<4> BURST_COUNT; - bits<1> VPM; - bits<1> eop; - bits<1> MARK; - bits<1> BARRIER; - - // CF_ALLOC_EXPORT_WORD0_RAT - let Inst{3-0} = rat_id; - let Inst{9-4} = rat_inst; - let Inst{10} = 0; // Reserved - let Inst{12-11} = RIM; - let Inst{14-13} = TYPE; - let Inst{21-15} = RW_GPR; - let Inst{22} = RW_REL; - let Inst{29-23} = INDEX_GPR; - let Inst{31-30} = ELEM_SIZE; - - // CF_ALLOC_EXPORT_WORD1_BUF - let Inst{43-32} = ARRAY_SIZE; - let Inst{47-44} = COMP_MASK; - let Inst{51-48} = BURST_COUNT; - let Inst{52} = VPM; - let Inst{53} = eop; - let Inst{61-54} = cf_inst; - let Inst{62} = MARK; - let Inst{63} = BARRIER; +def TEX_MSAA : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 14; + }] +>; + +def TEX_ARRAY_MSAA : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 15; + }] +>; + +class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, + dag outs, dag ins, string asm, list<dag> pattern> : + InstR600ISA <outs, ins, asm, pattern>, + CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { + + let rat_id = ratid; + let rat_inst = ratinst; + let rim = 0; + // XXX: Have a separate instruction for non-indexed writes. + let type = 1; + let rw_rel = 0; + let elem_size = 0; + + let array_size = 0; + let comp_mask = mask; + let burst_count = 0; + let vpm = 0; + let cf_inst = cfinst; + let mark = 0; + let barrier = 1; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + let IsExport = 1; + +} + +class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>, + VTX_WORD1_GPR { + + // Static fields + let DST_REL = 0; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + + let Inst{63-32} = Word1; + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; + + let VTXInst = 1; } class LoadParamFrag <PatFrag load_type> : PatFrag < (ops node:$ptr), (load_type node:$ptr), - [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }] + [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }] >; def load_param : LoadParamFrag<load>; -def load_param_zexti8 : LoadParamFrag<zextloadi8>; -def load_param_zexti16 : LoadParamFrag<zextloadi16>; - -def isR600 : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; -def isR700 : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" - "Subtarget.device()->getDeviceFlag()" - ">= OCL_DEVICE_RV710">; +def load_param_exti8 : LoadParamFrag<az_extloadi8>; +def load_param_exti16 : LoadParamFrag<az_extloadi16>; + +def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">; +def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">; def isEG : Predicate< - "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " - "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " - "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; + "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && " + "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " + "!Subtarget.hasCaymanISA()">; -def isCayman : Predicate<"Subtarget.device()" - "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; -def isEGorCayman : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" - "|| Subtarget.device()->getGeneration() ==" - "AMDGPUDeviceInfo::HD6XXX">; +def isCayman : Predicate<"Subtarget.hasCaymanISA()">; +def isEGorCayman : Predicate<"Subtarget.getGeneration() == " + "AMDGPUSubtarget::EVERGREEN" + "|| Subtarget.getGeneration() ==" + "AMDGPUSubtarget::NORTHERN_ISLANDS">; def isR600toCayman : Predicate< - "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; + "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; //===----------------------------------------------------------------------===// // R600 SDNodes @@ -603,13 +356,13 @@ def isR600toCayman : Predicate< def INTERP_PAIR_XY : AMDGPUShaderInst < (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), - (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", []>; def INTERP_PAIR_ZW : AMDGPUShaderInst < (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), - (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", []>; @@ -618,6 +371,44 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", [SDNPVariadic] >; +def DOT4 : SDNode<"AMDGPUISD::DOT4", + SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>, + SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>, + SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>, + [] +>; + +def COS_HW : SDNode<"AMDGPUISD::COS_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + +def SIN_HW : SDNode<"AMDGPUISD::SIN_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + +def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; + +def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; + +multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> { +def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, + (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), + (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), + (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), + (i32 imm:$DST_SEL_W), + (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID), + (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z), + (i32 imm:$COORD_TYPE_W)), + (inst R600_Reg128:$SRC_GPR, + imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw, + imm:$offsetx, imm:$offsety, imm:$offsetz, + imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z, + imm:$DST_SEL_W, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, + imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z, + imm:$COORD_TYPE_W)>; +} + //===----------------------------------------------------------------------===// // Interpolation Instructions //===----------------------------------------------------------------------===// @@ -626,7 +417,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst", - []>; + [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -753,13 +544,14 @@ let usesCustomInserter = 1, isNotDuplicable = 1 in { class ExportSwzInst : InstR600ISA<( outs), (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, - i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, + RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst, i32imm:$eop), - !strconcat("EXPORT", " $gpr"), + !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"), []>, ExportWord0, ExportSwzWord1 { let elem_size = 3; let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } } // End usesCustomInserter = 1 @@ -773,47 +565,13 @@ class ExportBufInst : InstR600ISA<( let elem_size = 0; let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } //===----------------------------------------------------------------------===// // Control Flow Instructions //===----------------------------------------------------------------------===// -class CF_ALU_WORD0 { - field bits<32> Word0; - - bits<22> ADDR; - bits<4> KCACHE_BANK0; - bits<4> KCACHE_BANK1; - bits<2> KCACHE_MODE0; - - let Word0{21-0} = ADDR; - let Word0{25-22} = KCACHE_BANK0; - let Word0{29-26} = KCACHE_BANK1; - let Word0{31-30} = KCACHE_MODE0; -} - -class CF_ALU_WORD1 { - field bits<32> Word1; - - bits<2> KCACHE_MODE1; - bits<8> KCACHE_ADDR0; - bits<8> KCACHE_ADDR1; - bits<7> COUNT; - bits<1> ALT_CONST; - bits<4> CF_INST; - bits<1> WHOLE_QUAD_MODE; - bits<1> BARRIER; - - let Word1{1-0} = KCACHE_MODE1; - let Word1{9-2} = KCACHE_ADDR0; - let Word1{17-10} = KCACHE_ADDR1; - let Word1{24-18} = COUNT; - let Word1{25} = ALT_CONST; - let Word1{29-26} = CF_INST; - let Word1{30} = WHOLE_QUAD_MODE; - let Word1{31} = BARRIER; -} def KCACHE : InstFlag<"printKCache">; @@ -821,7 +579,7 @@ class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, -i32imm:$COUNT), +i32imm:$COUNT, i32imm:$Enabled), !strconcat(OpName, " $COUNT, @$ADDR, " "KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), [] >, CF_ALU_WORD0, CF_ALU_WORD1 { @@ -831,6 +589,7 @@ i32imm:$COUNT), let ALT_CONST = 0; let WHOLE_QUAD_MODE = 0; let BARRIER = 1; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -844,45 +603,19 @@ class CF_WORD0_R600 { let Word0 = ADDR; } -class CF_WORD1_R600 { - field bits<32> Word1; - - bits<3> POP_COUNT; - bits<5> CF_CONST; - bits<2> COND; - bits<3> COUNT; - bits<6> CALL_COUNT; - bits<1> COUNT_3; - bits<1> END_OF_PROGRAM; - bits<1> VALID_PIXEL_MODE; - bits<7> CF_INST; - bits<1> WHOLE_QUAD_MODE; - bits<1> BARRIER; - - let Word1{2-0} = POP_COUNT; - let Word1{7-3} = CF_CONST; - let Word1{9-8} = COND; - let Word1{12-10} = COUNT; - let Word1{18-13} = CALL_COUNT; - let Word1{19} = COUNT_3; - let Word1{21} = END_OF_PROGRAM; - let Word1{22} = VALID_PIXEL_MODE; - let Word1{29-23} = CF_INST; - let Word1{30} = WHOLE_QUAD_MODE; - let Word1{31} = BARRIER; -} - class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; + bits<4> CNT; let CF_INST = inst; let BARRIER = 1; let CF_CONST = 0; let VALID_PIXEL_MODE = 0; let COND = 0; + let COUNT = CNT{2-0}; let CALL_COUNT = 0; - let COUNT_3 = 0; + let COUNT_3 = CNT{3}; let END_OF_PROGRAM = 0; let WHOLE_QUAD_MODE = 0; @@ -890,38 +623,6 @@ ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { let Inst{63-32} = Word1; } -class CF_WORD0_EG { - field bits<32> Word0; - - bits<24> ADDR; - bits<3> JUMPTABLE_SEL; - - let Word0{23-0} = ADDR; - let Word0{26-24} = JUMPTABLE_SEL; -} - -class CF_WORD1_EG { - field bits<32> Word1; - - bits<3> POP_COUNT; - bits<5> CF_CONST; - bits<2> COND; - bits<6> COUNT; - bits<1> VALID_PIXEL_MODE; - bits<1> END_OF_PROGRAM; - bits<8> CF_INST; - bits<1> BARRIER; - - let Word1{2-0} = POP_COUNT; - let Word1{7-3} = CF_CONST; - let Word1{9-8} = COND; - let Word1{15-10} = COUNT; - let Word1{20} = VALID_PIXEL_MODE; - let Word1{21} = END_OF_PROGRAM; - let Word1{29-22} = CF_INST; - let Word1{31} = BARRIER; -} - class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { field bits<64> Inst; @@ -940,6 +641,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; +def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">; def FETCH_CLAUSE : AMDGPUInst <(outs), (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { @@ -987,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. def SETE : R600_2OP < 0x08, "SETE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))] >; def SGT : R600_2OP < 0x09, "SETGT", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))] >; def SGE : R600_2OP < 0xA, "SETGE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))] >; def SNE : R600_2OP < 0xB, "SETNE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))] >; def SETE_DX10 : R600_2OP < 0xC, "SETE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))] >; def SETGT_DX10 : R600_2OP < 0xD, "SETGT_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))] >; def SETGE_DX10 : R600_2OP < 0xE, "SETGE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))] >; def SETNE_DX10 : R600_2OP < 0xF, "SETNE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))] >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; @@ -1120,104 +822,86 @@ def CNDE_INT : R600_3OP < def CNDGE_INT : R600_3OP < 0x1E, "CNDGE_INT", - [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))] >; def CNDGT_INT : R600_3OP < 0x1D, "CNDGT_INT", - [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))] >; //===----------------------------------------------------------------------===// // Texture instructions //===----------------------------------------------------------------------===// -def TEX_LD : R600_TEX < - 0x03, "TEX_LD", - [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR, - imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, - imm:$SAMPLER_ID, imm:$textureTarget))] -> { -let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z," - "$RESOURCE_ID, $SAMPLER_ID, $textureTarget"; -let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, - i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, - i32imm:$textureTarget); -} - -def TEX_GET_TEXTURE_RESINFO : R600_TEX < - 0x04, "TEX_GET_TEXTURE_RESINFO", - [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; - -def TEX_GET_GRADIENTS_H : R600_TEX < - 0x07, "TEX_GET_GRADIENTS_H", - [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; - -def TEX_GET_GRADIENTS_V : R600_TEX < - 0x08, "TEX_GET_GRADIENTS_V", - [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; - -def TEX_SET_GRADIENTS_H : R600_TEX < - 0x0B, "TEX_SET_GRADIENTS_H", - [] ->; - -def TEX_SET_GRADIENTS_V : R600_TEX < - 0x0C, "TEX_SET_GRADIENTS_V", - [] ->; +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { -def TEX_SAMPLE : R600_TEX < - 0x10, "TEX_SAMPLE", - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; +class R600_TEX <bits<11> inst, string opName> : + InstR600 <(outs R600_Reg128:$DST_GPR), + (ins R600_Reg128:$SRC_GPR, + RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw, + i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz, + RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W, + i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, + CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, + CT:$COORD_TYPE_W), + !strconcat(opName, + " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " + "$SRC_GPR.$srcx$srcy$srcz$srcw " + "RID:$RESOURCE_ID SID:$SAMPLER_ID " + "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"), + [], + NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; -def TEX_SAMPLE_C : R600_TEX < - 0x18, "TEX_SAMPLE_C", - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] ->; + let TEX_INST = inst{4-0}; + let SRC_REL = 0; + let DST_REL = 0; + let LOD_BIAS = 0; -def TEX_SAMPLE_L : R600_TEX < - 0x11, "TEX_SAMPLE_L", - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; + let INST_MOD = 0; + let FETCH_WHOLE_QUAD = 0; + let ALT_CONST = 0; + let SAMPLER_INDEX_MODE = 0; + let RESOURCE_INDEX_MODE = 0; -def TEX_SAMPLE_C_L : R600_TEX < - 0x19, "TEX_SAMPLE_C_L", - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] ->; + let TEXInst = 1; +} -def TEX_SAMPLE_LB : R600_TEX < - 0x12, "TEX_SAMPLE_LB", - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; +} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 -def TEX_SAMPLE_C_LB : R600_TEX < - 0x1A, "TEX_SAMPLE_C_LB", - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] ->; -def TEX_SAMPLE_G : R600_TEX < - 0x14, "TEX_SAMPLE_G", - [] ->; -def TEX_SAMPLE_C_G : R600_TEX < - 0x1C, "TEX_SAMPLE_C_G", - [] ->; +def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">; +def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">; +def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">; +def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; +def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; +def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; +def TEX_LD : R600_TEX <0x03, "TEX_LD">; +def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> { + let INST_MOD = 1; +} +def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; +def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; +def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; +def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">; +def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">; +def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">; +def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">; + +defm : TexPattern<0, TEX_SAMPLE>; +defm : TexPattern<1, TEX_SAMPLE_C>; +defm : TexPattern<2, TEX_SAMPLE_L>; +defm : TexPattern<3, TEX_SAMPLE_C_L>; +defm : TexPattern<4, TEX_SAMPLE_LB>; +defm : TexPattern<5, TEX_SAMPLE_C_LB>; +defm : TexPattern<6, TEX_LD, v4i32>; +defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; +defm : TexPattern<8, TEX_GET_GRADIENTS_H>; +defm : TexPattern<9, TEX_GET_GRADIENTS_V>; +defm : TexPattern<10, TEX_LDPTR, v4i32>; //===----------------------------------------------------------------------===// // Helper classes for common instructions @@ -1240,41 +924,82 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < class CNDE_Common <bits<5> inst> : R600_3OP < inst, "CNDE", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] >; class CNDGT_Common <bits<5> inst> : R600_3OP < inst, "CNDGT", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))] ->; + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))] +> { + let Itinerary = VecALU; +} class CNDGE_Common <bits<5> inst> : R600_3OP < inst, "CNDGE", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] ->; - -multiclass DOT4_Common <bits<11> inst> { + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))] +> { + let Itinerary = VecALU; +} + + +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins +// Slot X + UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, + OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X, + R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X, + R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X, + R600_Pred:$pred_sel_X, +// Slot Y + UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y, + OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y, + R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y, + R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y, + R600_Pred:$pred_sel_Y, +// Slot Z + UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z, + OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z, + R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z, + R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z, + R600_Pred:$pred_sel_Z, +// Slot W + UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W, + OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W, + R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W, + R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W, + R600_Pred:$pred_sel_W, + LITERAL:$literal0, LITERAL:$literal1), + "", + pattern, + AnyALU> { - def _pseudo : R600_REDUCTION <inst, - (ins R600_Reg128:$src0, R600_Reg128:$src1), - "DOT4 $dst $src0, $src1", - [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))] - >; + let UseNamedOperandTable = 1; - def _real : R600_2OP <inst, "DOT4", []>; } +} + +def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4 + R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X, + R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y, + R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z, + R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>; + + +class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>; + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { multiclass CUBE_Common <bits<11> inst> { def _pseudo : InstR600 < (outs R600_Reg128:$dst), - (ins R600_Reg128:$src), - "CUBE $dst $src", - [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))], + (ins R600_Reg128:$src0), + "CUBE $dst $src0", + [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))], VecALU > { let isPseudo = 1; + let UseNamedOperandTable = 1; } def _real : R600_2OP <inst, "CUBE", []>; @@ -1284,35 +1009,30 @@ multiclass CUBE_Common <bits<11> inst> { class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < inst, "EXP_IEEE", fexp2 > { - let TransOnly = 1; let Itinerary = TransALU; } class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < inst, "FLT_TO_INT", fp_to_sint > { - let TransOnly = 1; let Itinerary = TransALU; } class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < inst, "INT_TO_FLT", sint_to_fp > { - let TransOnly = 1; let Itinerary = TransALU; } class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < inst, "FLT_TO_UINT", fp_to_uint > { - let TransOnly = 1; let Itinerary = TransALU; } class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < inst, "UINT_TO_FLT", uint_to_fp > { - let TransOnly = 1; let Itinerary = TransALU; } @@ -1323,7 +1043,6 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < inst, "LOG_IEEE", flog2 > { - let TransOnly = 1; let Itinerary = TransALU; } @@ -1333,75 +1052,68 @@ class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULHI_INT", mulhs > { - let TransOnly = 1; let Itinerary = TransALU; } class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULHI", mulhu > { - let TransOnly = 1; let Itinerary = TransALU; } class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULLO_INT", mul > { - let TransOnly = 1; let Itinerary = TransALU; } class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIP_CLAMPED", [] > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < inst, "RECIP_UINT", AMDGPUurecip > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_IEEE", [] > { - let TransOnly = 1; let Itinerary = TransALU; } class SIN_Common <bits<11> inst> : R600_1OP < - inst, "SIN", []>{ + inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; - let TransOnly = 1; let Itinerary = TransALU; } class COS_Common <bits<11> inst> : R600_1OP < - inst, "COS", []> { + inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { let Trig = 1; - let TransOnly = 1; let Itinerary = TransALU; } +def CLAMP_R600 : CLAMP <R600_Reg32>; +def FABS_R600 : FABS<R600_Reg32>; +def FNEG_R600 : FNEG<R600_Reg32>; + //===----------------------------------------------------------------------===// // Helper patterns for complex intrinsics //===----------------------------------------------------------------------===// @@ -1424,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) >; +// FROUND pattern +class FROUNDPat<Instruction CNDGE> : Pat < + (AMDGPUround f32:$x), + (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) +>; + + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1436,7 +1155,7 @@ let Predicates = [isR600] in { def CNDE_r600 : CNDE_Common<0x18>; def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; - defm DOT4_r600 : DOT4_Common<0x50>; + def DOT4_r600 : DOT4_Common<0x50>; defm CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; @@ -1465,11 +1184,12 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + def : FROUNDPat <CNDGE_r600>; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; - let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{22} = 0; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } @@ -1478,58 +1198,58 @@ let Predicates = [isR600] in { def R600_ExportBuf : ExportBufInst { let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; - let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{22} = 0; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; - def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT), - "TEX $COUNT @$ADDR"> { + def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT), + "TEX $CNT @$ADDR"> { let POP_COUNT = 0; } - def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT), - "VTX $COUNT @$ADDR"> { + def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT), + "VTX $CNT @$ADDR"> { let POP_COUNT = 0; } def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let CNT = 0; } def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let CNT = 0; } def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { let ADDR = 0; - let COUNT = 0; + let CNT = 0; let POP_COUNT = 0; } def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let CNT = 0; } def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { - let COUNT = 0; + let CNT = 0; let POP_COUNT = 0; let ADDR = 0; let END_OF_PROGRAM = 1; @@ -1537,18 +1257,6 @@ let Predicates = [isR600] in { } -// Helper pattern for normalizing inputs to triginomic instructions for R700+ -// cards. -class COS_PAT <InstR600 trig> : Pat< - (fcos f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - -class SIN_PAT <InstR600 trig> : Pat< - (fsin f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - //===----------------------------------------------------------------------===// // R700 Only instructions //===----------------------------------------------------------------------===// @@ -1556,12 +1264,35 @@ class SIN_PAT <InstR600 trig> : Pat< let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; def COS_r700 : COS_Common<0x6F>; +} + +//===----------------------------------------------------------------------===// +// Evergreen / Cayman store instructions +//===----------------------------------------------------------------------===// - // R700 normalizes inputs to SIN/COS the same as EG - def : SIN_PAT <SIN_r700>; - def : COS_PAT <COS_r700>; +let Predicates = [isEGorCayman] in { + +class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins, + string name, list<dag> pattern> + : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, + "MEM_RAT_CACHELESS "#name, pattern>; + +class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name, + list<dag> pattern> + : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, + "MEM_RAT "#name, pattern>; + +def RAT_MSKOR : CF_MEM_RAT <0x11, 0, + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), + "MSKOR $rw_gpr.XW, $index_gpr", + [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] +> { + let eop = 0; } +} // End Predicates = [isEGorCayman] + + //===----------------------------------------------------------------------===// // Evergreen Only instructions //===----------------------------------------------------------------------===// @@ -1585,9 +1316,179 @@ def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; -def : SIN_PAT <SIN_eg>; -def : COS_PAT <COS_eg>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; + +//===----------------------------------------------------------------------===// +// Memory read/write instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1 in { + +// 32-bit store +def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + "STORE_RAW $rw_gpr, $index_gpr, $eop", + [(global_store i32:$rw_gpr, i32:$index_gpr)] +>; + +// 64-bit store +def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, + (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", + [(global_store v2i32:$rw_gpr, i32:$index_gpr)] +>; + +//128-bit store +def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", + [(global_store v4i32:$rw_gpr, i32:$index_gpr)] +>; + +} // End usesCustomInserter = 1 + +class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> { + + // Static fields + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + let SRC_SEL_X = 0; + + let Inst{31-0} = Word0; +} + +class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 1; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 1; // FMT_8 +} + +class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + let MEGA_FETCH_COUNT = 2; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 5; // FMT_16 + +} + +class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 + + // This is not really necessary, but there were some GPU hangs that appeared + // to be caused by ALU instructions in the next instruction group that wrote + // to the $src_gpr registers of the VTX_READ. + // e.g. + // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 + // %T2_X<def> = MOV %ZERO + //Adding this constraint prevents this from happening. + let Constraints = "$src_gpr.ptr = $dst_gpr"; +} + +class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id, + (outs R600_Reg64:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 8; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 7; + let DST_SEL_W = 7; + let DATA_FORMAT = 0x1D; // COLOR_32_32 +} + +class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, + (outs R600_Reg128:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 + + // XXX: Need to force VTX_READ_128 instructions to write to the same register + // that holds its buffer address to avoid potential hangs. We can't use + // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst + // registers are different sizes. +} + +//===----------------------------------------------------------------------===// +// VTX Read from parameter memory space +//===----------------------------------------------------------------------===// + +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, + [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, + [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, + [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, + [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +//===----------------------------------------------------------------------===// +// VTX Read from global memory space +//===----------------------------------------------------------------------===// + +// 8-bit reads +def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, + [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1, + [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] +>; + +// 32-bit reads +def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, + [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +// 64-bit reads +def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1, + [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +// 128-bit reads +def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, + [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1620,10 +1521,11 @@ let Predicates = [isEGorCayman] in { def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; defm : BFIPatterns <BFI_INT_eg>; - def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", - [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))], - VecALU + def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", + [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU >; + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; + def : ROTRPattern <BIT_ALIGN_INT_eg>; def MULADD_eg : MULADD_Common<0x14>; def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; @@ -1635,7 +1537,10 @@ let Predicates = [isEGorCayman] in { def CNDGE_eg : CNDGE_Common<0x1B>; def MUL_LIT_eg : MUL_LIT_Common<0x1F>; def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; - defm DOT4_eg : DOT4_Common<0xBE>; + def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", + [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU + >; + def DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; let hasSideEffects = 1 in { @@ -1646,6 +1551,7 @@ let hasSideEffects = 1 in { def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { let Pattern = []; + let Itinerary = AnyALU; } def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; @@ -1656,6 +1562,165 @@ let hasSideEffects = 1 in { def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; +def GROUP_BARRIER : InstR600 < + (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, + R600ALU_Word0, + R600ALU_Word1_OP2 <0x54> { + + let dst = 0; + let dst_rel = 0; + let src0 = 0; + let src0_rel = 0; + let src0_neg = 0; + let src0_abs = 0; + let src1 = 0; + let src1_rel = 0; + let src1_neg = 0; + let src1_abs = 0; + let write = 0; + let omod = 0; + let clamp = 0; + let last = 1; + let bank_swizzle = 0; + let pred_sel = 0; + let update_exec_mask = 0; + let update_pred = 0; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let ALUInst = 1; +} + +//===----------------------------------------------------------------------===// +// LDS Instructions +//===----------------------------------------------------------------------===// +class R600_LDS <bits<6> op, dag outs, dag ins, string asm, + list<dag> pattern = []> : + + InstR600 <outs, ins, asm, pattern, XALU>, + R600_ALU_LDS_Word0, + R600LDS_Word1 { + + bits<6> offset = 0; + let lds_op = op; + + let Word1{27} = offset{0}; + let Word1{12} = offset{1}; + let Word1{28} = offset{2}; + let Word1{31} = offset{3}; + let Word0{12} = offset{4}; + let Word0{25} = offset{5}; + + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let ALUInst = 1; + let HasNativeOperands = 1; + let UseNamedOperandTable = 1; +} + +class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS < + lds_op, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + LAST:$last, R600_Pred:$pred_sel, + BANK_SWIZZLE:$bank_swizzle), + " "#name#" $last OQAP, $src0$src0_rel $pred_sel", + pattern + > { + + let src1 = 0; + let src1_rel = 0; + let src2 = 0; + let src2_rel = 0; + + let Defs = [OQAP]; + let usesCustomInserter = 1; + let LDS_1A = 1; + let DisableEncoding = "$dst"; +} + +class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern, + string dst =""> : + R600_LDS < + lds_op, outs, + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, + LAST:$last, R600_Pred:$pred_sel, + BANK_SWIZZLE:$bank_swizzle), + " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", + pattern + > { + + field string BaseOp; + + let src2 = 0; + let src2_rel = 0; + let LDS_1A1D = 1; +} + +class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS_1A1D <lds_op, (outs), name, pattern> { + let BaseOp = name; +} + +class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> { + + let BaseOp = name; + let usesCustomInserter = 1; + let DisableEncoding = "$dst"; + let Defs = [OQAP]; +} + +class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS < + lds_op, + (outs), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, + LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), + " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", + pattern> { + let LDS_1A2D = 1; +} + +def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; +def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; +def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", + [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] +>; +def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", + [(truncstorei8_local i32:$src1, i32:$src0)] +>; +def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", + [(truncstorei16_local i32:$src1, i32:$src0)] +>; +def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", + [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] +>; +def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", + [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] +>; +def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", + [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] +>; +def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", + [(set i32:$dst, (sextloadi8_local i32:$src0))] +>; +def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", + [(set i32:$dst, (az_extloadi8_local i32:$src0))] +>; +def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", + [(set i32:$dst, (sextloadi16_local i32:$src0))] +>; +def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", + [(set i32:$dst, (az_extloadi16_local i32:$src0))] +>; + // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently // depending on the instruction and the slot they are in. @@ -1673,9 +1738,11 @@ let hasSideEffects = 1 in { // SHA-256 Patterns def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; + def : FROUNDPat <CNDGE_eg>; + def EG_ExportSwz : ExportSwzInst { let Word1{19-16} = 0; // BURST_COUNT - let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{20} = 0; // VALID_PIXEL_MODE let Word1{21} = eop; let Word1{29-22} = inst; let Word1{30} = 0; // MARK @@ -1685,7 +1752,7 @@ let hasSideEffects = 1 in { def EG_ExportBuf : ExportBufInst { let Word1{19-16} = 0; // BURST_COUNT - let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{20} = 0; // VALID_PIXEL_MODE let Word1{21} = eop; let Word1{29-22} = inst; let Word1{30} = 0; // MARK @@ -1744,48 +1811,78 @@ let hasSideEffects = 1 in { let END_OF_PROGRAM = 1; } +} // End Predicates = [isEGorCayman] + //===----------------------------------------------------------------------===// -// Memory read/write instructions +// Regist loads and stores - for indirect addressing //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { -class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, - list<dag> pattern> - : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, - !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { - let RIM = 0; - // XXX: Have a separate instruction for non-indexed writes. - let TYPE = 1; - let RW_REL = 0; - let ELEM_SIZE = 0; +defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; - let ARRAY_SIZE = 0; - let COMP_MASK = comp_mask; - let BURST_COUNT = 0; - let VPM = 0; - let MARK = 0; - let BARRIER = 1; -} +//===----------------------------------------------------------------------===// +// Cayman Instructions +//===----------------------------------------------------------------------===// -} // End usesCustomInserter = 1 +let Predicates = [isCayman] in { -// 32-bit store -def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0x1, "RAT_WRITE_CACHELESS_32_eg", - [(global_store i32:$rw_gpr, i32:$index_gpr)] +def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", + [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU +>; +def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", + [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU >; -//128-bit store -def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < - (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0xf, "RAT_WRITE_CACHELESS_128", - [(global_store v4i32:$rw_gpr, i32:$index_gpr)] +let isVector = 1 in { + +def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; + +def MULLO_INT_cm : MULLO_INT_Common<0x8F>; +def MULHI_INT_cm : MULHI_INT_Common<0x90>; +def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; +def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; +def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; +def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; +def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; +def SIN_cm : SIN_Common<0x8D>; +def COS_cm : COS_Common<0x8E>; +} // End isVector = 1 + +def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; + +defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; + +// RECIP_UINT emulation for Cayman +// The multiplication scales from [0,1] to the unsigned integer range +def : Pat < + (AMDGPUurecip i32:$src0), + (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), + (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) >; -class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> - : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, - VTX_WORD1_GPR, VTX_WORD0 { + def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { + let ADDR = 0; + let POP_COUNT = 0; + let COUNT = 0; + } + +def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; + +class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : + CF_MEM_RAT_CACHELESS <0x14, 0, mask, + (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), + "STORE_DWORD $rw_gpr, $index_gpr", + [(global_store vt:$rw_gpr, i32:$index_gpr)]> { + let eop = 0; // This bit is not used on Cayman. +} + +def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>; +def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>; +def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>; + +class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { // Static fields let VC_INST = 0; @@ -1796,53 +1893,18 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> // XXX: We can infer this field based on the SRC_GPR. This would allow us // to store vertex addresses in any channel, not just X. let SRC_SEL_X = 0; - let DST_REL = 0; - // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, - // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, - // however, based on my testing if USE_CONST_FIELDS is set, then all - // these fields need to be set to 0. - let USE_CONST_FIELDS = 0; - let NUM_FORMAT_ALL = 1; - let FORMAT_COMP_ALL = 0; - let SRF_MODE_ALL = 0; + let SRC_SEL_Y = 0; + let STRUCTURED_READ = 0; + let LDS_REQ = 0; + let COALESCED_READ = 0; let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - // LLVM can only encode 64-bit instructions, so these fields are manually - // encoded in R600CodeEmitter - // - // bits<16> OFFSET; - // bits<2> ENDIAN_SWAP = 0; - // bits<1> CONST_BUF_NO_STRIDE = 0; - // bits<1> MEGA_FETCH = 0; - // bits<1> ALT_CONST = 0; - // bits<2> BUFFER_INDEX_MODE = 0; - - - - // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding - // is done in R600CodeEmitter - // - // Inst{79-64} = OFFSET; - // Inst{81-80} = ENDIAN_SWAP; - // Inst{82} = CONST_BUF_NO_STRIDE; - // Inst{83} = MEGA_FETCH; - // Inst{84} = ALT_CONST; - // Inst{86-85} = BUFFER_INDEX_MODE; - // Inst{95-86} = 0; Reserved - - // VTX_WORD3 (Padding) - // - // Inst{127-96} = 0; - - let VTXInst = 1; } -class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { +class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 1; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1850,10 +1912,9 @@ class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> let DATA_FORMAT = 1; // FMT_8 } -class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { - let MEGA_FETCH_COUNT = 2; +class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1862,11 +1923,10 @@ class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> } -class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { +class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 4; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1875,19 +1935,29 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> // This is not really necessary, but there were some GPU hangs that appeared // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. + // to the $src_gpr registers of the VTX_READ. // e.g. // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 // %T2_X<def> = MOV %ZERO //Adding this constraint prevents this from happening. - let Constraints = "$ptr.ptr = $dst"; + let Constraints = "$src_gpr.ptr = $dst_gpr"; } -class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> - : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), - pattern> { +class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id, + (outs R600_Reg64:$dst_gpr), pattern> { + + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 7; + let DST_SEL_W = 7; + let DATA_FORMAT = 0x1D; // COLOR_32_32 +} + +class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, + (outs R600_Reg128:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 16; let DST_SEL_X = 0; let DST_SEL_Y = 1; let DST_SEL_Z = 2; @@ -1896,28 +1966,31 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> // XXX: Need to force VTX_READ_128 instructions to write to the same register // that holds its buffer address to avoid potential hangs. We can't use - // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst + // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst // registers are different sizes. } //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// +def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, + [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] +>; -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, - [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, + [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, - [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, + [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, - [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0, + [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, - [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, + [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; //===----------------------------------------------------------------------===// @@ -1925,78 +1998,29 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, //===----------------------------------------------------------------------===// // 8-bit reads -def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, - [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] +def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, + [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] >; -// 32-bit reads -def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] +def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1, + [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] >; -// 128-bit reads -def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, - [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] +// 32-bit reads +def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, + [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] +// 64-bit reads +def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1, + [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; -} - -//===----------------------------------------------------------------------===// -// Regist loads and stores - for indirect addressing -//===----------------------------------------------------------------------===// - -defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; - -let Predicates = [isCayman] in { - -let isVector = 1 in { - -def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; - -def MULLO_INT_cm : MULLO_INT_Common<0x8F>; -def MULHI_INT_cm : MULHI_INT_Common<0x90>; -def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; -def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; -def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; -def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; -def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; -def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; -def SIN_cm : SIN_Common<0x8D>; -def COS_cm : COS_Common<0x8E>; -} // End isVector = 1 - -def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; -def : SIN_PAT <SIN_cm>; -def : COS_PAT <COS_cm>; - -defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; - -// RECIP_UINT emulation for Cayman -// The multiplication scales from [0,1] to the unsigned integer range -def : Pat < - (AMDGPUurecip i32:$src0), - (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), - (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) +// 128-bit reads +def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, + [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; - def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { - let ADDR = 0; - let POP_COUNT = 0; - let COUNT = 0; - } - -def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - } // End isCayman //===----------------------------------------------------------------------===// @@ -2007,9 +2031,6 @@ def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), "IF_PREDICATE_SET $src", []>; -def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), - "PREDICATED_BREAK $src", []>; - //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -2083,10 +2104,6 @@ def TXD_SHADOW: InstR600 < } // End isPseudo = 1 } // End usesCustomInserter = 1 -def CLAMP_R600 : CLAMP <R600_Reg32>; -def FABS_R600 : FABS<R600_Reg32>; -def FNEG_R600 : FNEG<R600_Reg32>; - //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// @@ -2117,7 +2134,7 @@ def CONST_COPY : Instruction { def TEX_VTX_CONSTBUF : InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, - VTX_WORD1_GPR, VTX_WORD0 { + VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; @@ -2171,7 +2188,7 @@ def TEX_VTX_CONSTBUF : def TEX_VTX_TEXBUF: InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, -VTX_WORD1_GPR, VTX_WORD0 { +VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; @@ -2235,7 +2252,7 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { def BRANCH : ILFormat<(outs), (ins brtarget:$target), "; Pseudo unconditional branch instruction", [(br bb:$target)]>; - defm BRANCH_COND : BranchConditional<IL_brcond>; + defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>; } //===---------------------------------------------------------------------===// @@ -2306,7 +2323,7 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>; //CNDGE_INT extra pattern def : Pat < - (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT), + (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), (CNDGE_INT $src0, $src1, $src2) >; @@ -2321,86 +2338,6 @@ def KIL : Pat < (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; -// SGT Reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT), - (SGT $src1, $src0) ->; - -// SGE Reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE $src1, $src0) ->; - -// SETGT_DX10 reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT), - (SETGT_DX10 $src1, $src0) ->; - -// SETGE_DX10 reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE), - (SETGE_DX10 $src1, $src0) ->; - -// SETGT_INT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETLT), - (SETGT_INT $src1, $src0) ->; - -// SETGE_INT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETLE), - (SETGE_INT $src1, $src0) ->; - -// SETGT_UINT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETULT), - (SETGT_UINT $src1, $src0) ->; - -// SETGE_UINT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETULE), - (SETGE_UINT $src1, $src0) ->; - -// The next two patterns are special cases for handling 'true if ordered' and -// 'true if unordered' conditionals. The assumption here is that the behavior of -// SETE and SNE conforms to the Direct3D 10 rules for floating point values -// described here: -// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit -// We assume that SETE returns false when one of the operands is NAN and -// SNE returns true when on of the operands is NAN - -//SETE - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO), - (SETE $src0, $src1) ->; - -//SETE_DX10 - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, SETO), - (SETE_DX10 $src0, $src1) ->; - -//SNE - 'true if unordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO), - (SNE $src0, $src1) ->; - -//SETNE_DX10 - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, SETUO), - (SETNE_DX10 $src0, $src1) ->; - def : Extract_Element <f32, v4f32, 0, sub0>; def : Extract_Element <f32, v4f32, 1, sub1>; def : Extract_Element <f32, v4f32, 2, sub2>; @@ -2424,10 +2361,24 @@ def : Insert_Element <i32, v4i32, 3, sub3>; def : Vector4_Build <v4f32, f32>; def : Vector4_Build <v4i32, i32>; +def : Extract_Element <f32, v2f32, 0, sub0>; +def : Extract_Element <f32, v2f32, 1, sub1>; + +def : Insert_Element <f32, v2f32, 0, sub0>; +def : Insert_Element <f32, v2f32, 1, sub1>; + +def : Extract_Element <i32, v2i32, 0, sub0>; +def : Extract_Element <i32, v2i32, 1, sub1>; + +def : Insert_Element <i32, v2i32, 0, sub0>; +def : Insert_Element <i32, v2i32, 1, sub1>; + // bitconvert patterns def : BitConvert <i32, f32, R600_Reg32>; def : BitConvert <f32, i32, R600_Reg32>; +def : BitConvert <v2f32, v2i32, R600_Reg64>; +def : BitConvert <v2i32, v2f32, R600_Reg64>; def : BitConvert <v4f32, v4i32, R600_Reg128>; def : BitConvert <v4i32, v4f32, R600_Reg128>; @@ -2435,3 +2386,11 @@ def : BitConvert <v4i32, v4f32, R600_Reg128>; def : DwordAddrPat <i32, R600_Reg32>; } // End isR600toCayman Predicate + +def getLDSNoRetOp : InstrMapping { + let FilterClass = "R600_LDS_1A1D"; + let RowFields = ["BaseOp"]; + let ColFields = ["DisableEncoding"]; + let KeyCol = ["$dst"]; + let ValueCols = [[""""]]; +} |