summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td134
1 files changed, 104 insertions, 30 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 8cae83c..96b33c3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -23,18 +23,27 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
-
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f; // encoding
+}
+
+class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> {
+ bits<8> vdst;
+
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = op;
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
let Inst{31-25} = 0x3f; // encoding
}
-class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> :
InstSI <P.Outs32, P.Ins32, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
+ SIMCInstr <!if(VOP1Only, opName, opName#"_e32"), SIEncodingFamily.NONE>,
+ MnemonicAlias<!if(VOP1Only, opName, opName#"_e32"), opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -75,6 +84,8 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -83,10 +94,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
}
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
- list<dag> ret = !if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
+ list<dag> ret =
+ !if(P.HasModifiers,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers,
+ i1:$clamp, i32:$omod))))],
+ !if(P.HasOMod,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
+ i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
+ )
+ );
}
multiclass VOP1Inst <string opName, VOPProfile P,
@@ -96,6 +114,23 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
}
+// Special profile for instructions which have clamp
+// and output modifiers (but have no input modifiers)
+class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
+ VOPProfile<[dstVt, srcVt, untyped, untyped]> {
+
+ let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
+ let Asm64 = "$vdst, $src0$clamp$omod";
+
+ let HasModifiers = 0;
+ let HasClamp = 1;
+ let HasOMod = 1;
+}
+
+def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
+def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
+def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
+
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
@@ -142,24 +177,24 @@ def V_READFIRSTLANE_B32 :
let SchedRW = [WriteQuarterRate32] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
-defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
-defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
-defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
+defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
+defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
+defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
-defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
+defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
-defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
+defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
+defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
+defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
+defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
-defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
@@ -217,6 +252,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC64 = VRegSrc_32;
let HasExt = 0;
+ let HasSDWA9 = 0;
}
// Special case because there are no true output operands. Hack vdst
@@ -232,16 +268,19 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
- let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+
+ let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
- let Asm64 = getAsm64<1, 1, 0>.ret;
+ let Asm64 = getAsm64<1, 1, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
- let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
+ let AsmSDWA = getAsmSDWA<1, 1>.ret;
+ let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let HasExt = 0;
+ let HasSDWA9 = 0;
let HasDst = 0;
let EmitDst = 1; // force vdst emission
}
@@ -258,11 +297,14 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
} // End Uses = [M0, EXEC]
+let SchedRW = [WriteQuarterRate32] in {
+defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
+}
+
// These instruction only exist on SI and CI
let SubtargetPredicate = isSICI in {
let SchedRW = [WriteQuarterRate32] in {
-defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
@@ -295,10 +337,10 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
} // End SubtargetPredicate = isCIVI
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = Has16BitInsts in {
-defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
-defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
+defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
+defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
@@ -318,7 +360,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
}
-let Predicates = [isVI] in {
+let Predicates = [Has16BitInsts] in {
def : Pat<
(f32 (f16_to_fp i16:$src)),
@@ -326,12 +368,31 @@ def : Pat<
>;
def : Pat<
- (i16 (fp_to_f16 f32:$src)),
+ (i16 (AMDGPUfp_to_f16 f32:$src)),
(V_CVT_F16_F32_e32 $src)
>;
}
+def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+ let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1);
+ let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1);
+ let Outs64 = Outs32;
+ let Asm32 = " $vdst, $src0";
+ let Asm64 = "";
+ let Ins64 = (ins);
+}
+
+let SubtargetPredicate = isGFX9 in {
+ let Constraints = "$vdst = $src1, $vdst1 = $src0",
+ DisableEncoding="$vdst1,$src1",
+ SchedRW = [Write64Bit, Write64Bit] in {
+// Never VOP3. Takes as long as 2 v_mov_b32s
+def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>;
+}
+
+} // End SubtargetPredicate = isGFX9
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
@@ -453,6 +514,14 @@ class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
let Inst{31-25} = 0x3f; //encoding
}
+multiclass VOP1Only_Real_vi <bits<10> op> {
+ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ def _vi :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+ }
+}
+
multiclass VOP1_Real_vi <bits<10> op> {
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
def _e32_vi :
@@ -467,6 +536,10 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+
// For now left dpp only for asm/dasm
// TODO: add corresponding pseudo
def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
@@ -480,6 +553,7 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
+defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>;
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
@@ -547,7 +621,7 @@ defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
defm V_SIN_F16 : VOP1_Real_vi <0x49>;
defm V_COS_F16 : VOP1_Real_vi <0x4a>;
-
+defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>;
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
OpenPOWER on IntegriCloud