diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td | 134 |
1 files changed, 104 insertions, 30 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 8cae83c..96b33c3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -23,18 +23,27 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 { class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { bits<8> vdst; - + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + +class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { + bits<8> vdst; + let Inst{8-0} = 0xf9; // sdwa let Inst{16-9} = op; let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); let Inst{31-25} = 0x3f; // encoding } -class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : +class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : InstSI <P.Outs32, P.Ins32, "", pattern>, VOP <opName>, - SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>, - MnemonicAlias<opName#"_e32", opName> { + SIMCInstr <!if(VOP1Only, opName, opName#"_e32"), SIEncodingFamily.NONE>, + MnemonicAlias<!if(VOP1Only, opName, opName#"_e32"), opName> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -75,6 +84,8 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let Uses = ps.Uses; } class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : @@ -83,10 +94,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : } class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { - list<dag> ret = !if(P.HasModifiers, - [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]); + list<dag> ret = + !if(P.HasModifiers, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, + i32:$src0_modifiers, + i1:$clamp, i32:$omod))))], + !if(P.HasOMod, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, + i1:$clamp, i32:$omod))))], + [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] + ) + ); } multiclass VOP1Inst <string opName, VOPProfile P, @@ -96,6 +114,23 @@ multiclass VOP1Inst <string opName, VOPProfile P, def _sdwa : VOP1_SDWA_Pseudo <opName, P>; } +// Special profile for instructions which have clamp +// and output modifiers (but have no input modifiers) +class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : + VOPProfile<[dstVt, srcVt, untyped, untyped]> { + + let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let Asm64 = "$vdst, $src0$clamp$omod"; + + let HasModifiers = 0; + let HasClamp = 1; + let HasOMod = 1; +} + +def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; +def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; +def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; + //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -142,24 +177,24 @@ def V_READFIRSTLANE_B32 : let SchedRW = [WriteQuarterRate32] in { defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; -defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>; -defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>; -defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>; +defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; +defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; +defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; -defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>; -defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>; +defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; +defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; -defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>; +defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; -defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>; -defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>; -defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>; -defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>; +defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; +defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; +defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; +defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; -defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; } // End SchedRW = [WriteQuarterRate32] defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; @@ -217,6 +252,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; let HasExt = 0; + let HasSDWA9 = 0; } // Special case because there are no true output operands. Hack vdst @@ -232,16 +268,19 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; - let Asm64 = getAsm64<1, 1, 0>.ret; + let Asm64 = getAsm64<1, 1, 0, 1>.ret; let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; let HasExt = 0; + let HasSDWA9 = 0; let HasDst = 0; let EmitDst = 1; // force vdst emission } @@ -258,11 +297,14 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>; } // End Uses = [M0, EXEC] +let SchedRW = [WriteQuarterRate32] in { +defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; +} + // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { let SchedRW = [WriteQuarterRate32] in { -defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; @@ -295,10 +337,10 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { -defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>; -defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>; +defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; +defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; @@ -318,7 +360,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { def : Pat< (f32 (f16_to_fp i16:$src)), @@ -326,12 +368,31 @@ def : Pat< >; def : Pat< - (i16 (fp_to_f16 f32:$src)), + (i16 (AMDGPUfp_to_f16 f32:$src)), (V_CVT_F16_F32_e32 $src) >; } +def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { + let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); + let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); + let Outs64 = Outs32; + let Asm32 = " $vdst, $src0"; + let Asm64 = ""; + let Ins64 = (ins); +} + +let SubtargetPredicate = isGFX9 in { + let Constraints = "$vdst = $src1, $vdst1 = $src0", + DisableEncoding="$vdst1,$src1", + SchedRW = [Write64Bit, Write64Bit] in { +// Never VOP3. Takes as long as 2 v_mov_b32s +def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>; +} + +} // End SubtargetPredicate = isGFX9 + //===----------------------------------------------------------------------===// // Target //===----------------------------------------------------------------------===// @@ -453,6 +514,14 @@ class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : let Inst{31-25} = 0x3f; //encoding } +multiclass VOP1Only_Real_vi <bits<10> op> { + let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { + def _vi : + VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, + VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; + } +} + multiclass VOP1_Real_vi <bits<10> op> { let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { def _e32_vi : @@ -467,6 +536,10 @@ multiclass VOP1_Real_vi <bits<10> op> { VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; @@ -480,6 +553,7 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; +defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>; defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; @@ -547,7 +621,7 @@ defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; defm V_FRACT_F16 : VOP1_Real_vi <0x48>; defm V_SIN_F16 : VOP1_Real_vi <0x49>; defm V_COS_F16 : VOP1_Real_vi <0x4a>; - +defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; // Copy of v_mov_b32 with $vdst as a use operand for use with VGPR // indexing mode. vdst can't be treated as a def for codegen purposes, |