diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIInstructions.td')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SIInstructions.td | 338 |
1 files changed, 273 insertions, 65 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index 38e31e7..ba69e42 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -94,6 +94,12 @@ defm V_INTERP_MOV_F32 : VINTRP_m < //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// +def ATOMIC_FENCE : SPseudoInstSI< + (outs), (ins i32imm:$ordering, i32imm:$scope), + [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))], + "ATOMIC_FENCE $ordering, $scope"> { + let hasSideEffects = 1; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { @@ -146,6 +152,8 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), let mayStore = 1; let isBarrier = 1; let isConvergent = 1; + let FixedSize = 1; + let Size = 0; } // SI pseudo instructions. These are used by the CFG structurizer pass @@ -153,48 +161,51 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), // Dummy terminator instruction to use after control flow instructions // replaced with exec mask operations. -def SI_MASK_BRANCH : PseudoInstSI < +def SI_MASK_BRANCH : VPseudoInstSI < (outs), (ins brtarget:$target)> { let isBranch = 0; let isTerminator = 1; let isBarrier = 0; - let Uses = [EXEC]; let SchedRW = []; let hasNoSchedulingInfo = 1; + let FixedSize = 1; + let Size = 0; } let isTerminator = 1 in { + def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI < + (outs), + (ins SReg_64:$vcc, brtarget:$target), + [(brcond i1:$vcc, bb:$target)]> { + let Size = 12; +} + def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), - [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> { + [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> { let Constraints = ""; let Size = 12; - let mayLoad = 1; - let mayStore = 1; let hasSideEffects = 1; } def SI_ELSE : CFPseudoInstSI < - (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> { + (outs SReg_64:$dst), + (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> { let Constraints = "$src = $dst"; let Size = 12; - let mayStore = 1; - let mayLoad = 1; let hasSideEffects = 1; } def SI_LOOP : CFPseudoInstSI < (outs), (ins SReg_64:$saved, brtarget:$target), - [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> { + [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> { let Size = 8; - let isBranch = 1; + let isBranch = 0; let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; } -} // End isBranch = 1, isTerminator = 1 +} // End isTerminator = 1 def SI_END_CF : CFPseudoInstSI < (outs), (ins SReg_64:$saved), @@ -202,9 +213,9 @@ def SI_END_CF : CFPseudoInstSI < let Size = 4; let isAsCheapAsAMove = 1; let isReMaterializable = 1; - let mayLoad = 1; - let mayStore = 1; let hasSideEffects = 1; + let mayLoad = 1; // FIXME: Should not need memory flags + let mayStore = 1; } def SI_BREAK : CFPseudoInstSI < @@ -244,6 +255,10 @@ def SI_KILL_TERMINATOR : SPseudoInstSI < let isTerminator = 1; } +def SI_ILLEGAL_COPY : SPseudoInstSI < + (outs unknown:$dst), (ins unknown:$src), + [], " ; illegal copy $src to $dst">; + } // End Uses = [EXEC], Defs = [EXEC,VCC] // Branch on undef scc. Used to avoid intermediate copy from @@ -259,6 +274,14 @@ def SI_PS_LIVE : PseudoInstSI < let SALU = 1; } +def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins), + [(int_amdgcn_unreachable)], + "; divergent unreachable"> { + let Size = 0; + let hasNoSchedulingInfo = 1; + let FixedSize = 1; +} + // Used as an isel pseudo to directly emit initialization with an // s_mov_b32 rather than a copy of another initialized // register. MachineCSE skips copies, and we don't want to have to @@ -270,12 +293,25 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> { let isReMaterializable = 1; } -def SI_RETURN : SPseudoInstSI < - (outs), (ins variable_ops), [(AMDGPUreturn)]> { +def SI_INIT_EXEC : SPseudoInstSI < + (outs), (ins i64imm:$src), []> { + let Defs = [EXEC]; + let usesCustomInserter = 1; + let isAsCheapAsAMove = 1; +} + +def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI < + (outs), (ins SSrc_b32:$input, i32imm:$shift), []> { + let Defs = [EXEC]; + let usesCustomInserter = 1; +} + +// Return for returning shaders to a shader variant epilog. +def SI_RETURN_TO_EPILOG : SPseudoInstSI < + (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; - let hasSideEffects = 1; let hasNoSchedulingInfo = 1; let DisableWQM = 1; } @@ -383,9 +419,23 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < } // End SubtargetPredicate = isGCN let Predicates = [isGCN] in { +def : Pat < + (AMDGPUinit_exec i64:$src), + (SI_INIT_EXEC (as_i64imm $src)) +>; + +def : Pat < + (AMDGPUinit_exec_from_input i32:$input, i32:$shift), + (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) +>; + +def : Pat< + (AMDGPUtrap timm:$trapid), + (S_TRAP $trapid) +>; def : Pat< - (int_amdgcn_else i64:$src, bb:$target), + (AMDGPUelse i64:$src, bb:$target), (SI_ELSE $src, $target, 0) >; @@ -423,24 +473,37 @@ def : Pat < } // End Predicates = [UnsafeFPMath] + +// f16_to_fp patterns def : Pat < - (f32 (fpextend f16:$src)), - (V_CVT_F32_F16_e32 $src) + (f32 (f16_to_fp i32:$src0)), + (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < - (f64 (fpextend f16:$src)), - (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) + (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), + (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < - (f16 (fpround f32:$src)), - (V_CVT_F16_F32_e32 $src) + (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), + (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < - (f16 (fpround f64:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_F64_e32 $src)) + (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), + (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : Pat < + (f64 (fpextend f16:$src)), + (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) +>; + +// fp_to_fp16 patterns +def : Pat < + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < @@ -469,17 +532,27 @@ def : Pat < multiclass FMADPat <ValueType vt, Instruction inst> { def : Pat < - (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3NoMods vt:$src1, i32:$src1_modifiers), - (VOP3NoMods vt:$src2, i32:$src2_modifiers))), - (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - $src2_modifiers, $src2, $clamp, $omod) + (vt (fmad (VOP3NoMods vt:$src0), + (VOP3NoMods vt:$src1), + (VOP3NoMods vt:$src2))), + (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; } defm : FMADPat <f16, V_MAC_F16_e64>; defm : FMADPat <f32, V_MAC_F32_e64>; +class FMADModsPat<Instruction inst, SDPatternOperator mad_opr> : Pat< + (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod), + (VOP3Mods f32:$src1, i32:$src1_mod), + (VOP3Mods f32:$src2, i32:$src2_mod))), + (inst $src0_mod, $src0, $src1_mod, $src1, + $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz>; + multiclass SelectPat <ValueType vt, Instruction inst> { def : Pat < (vt (select i1:$src0, vt:$src1, vt:$src2)), @@ -578,6 +651,16 @@ def : BitConvert <i32, f32, VGPR_32>; def : BitConvert <f32, i32, VGPR_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <f32, i32, SReg_32>; +def : BitConvert <v2i16, i32, SReg_32>; +def : BitConvert <i32, v2i16, SReg_32>; +def : BitConvert <v2f16, i32, SReg_32>; +def : BitConvert <i32, v2f16, SReg_32>; +def : BitConvert <v2i16, v2f16, SReg_32>; +def : BitConvert <v2f16, v2i16, SReg_32>; +def : BitConvert <v2f16, f32, SReg_32>; +def : BitConvert <f32, v2f16, SReg_32>; +def : BitConvert <v2i16, f32, SReg_32>; +def : BitConvert <f32, v2i16, SReg_32>; // 64-bit bitcast def : BitConvert <i64, f64, VReg_64>; @@ -619,12 +702,19 @@ def : BitConvert <v16f32, v16i32, VReg_512>; /********** Src & Dst modifiers **********/ /********** =================== **********/ -def : Pat < - (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod), - (f32 FP_ZERO), (f32 FP_ONE)), - (V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod) + +// If denormals are not enabled, it only impacts the compare of the +// inputs. The output result is not flushed. +class ClampPat<Instruction inst, ValueType vt> : Pat < + (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), + (inst i32:$src0_modifiers, vt:$src0, + i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) >; +def : ClampPat<V_MAX_F32_e64, f32>; +def : ClampPat<V_MAX_F64, f64>; +def : ClampPat<V_MAX_F16_e64, f16>; + /********** ================================ **********/ /********** Floating point absolute/negative **********/ /********** ================================ **********/ @@ -678,6 +768,37 @@ def : Pat < >; def : Pat < + (fcopysign f16:$src0, f16:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) +>; + +def : Pat < + (fcopysign f32:$src0, f16:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0, + (V_LSHLREV_B32_e64 (i32 16), $src1)) +>; + +def : Pat < + (fcopysign f64:$src0, f16:$src1), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, + (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)), + (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1) +>; + +def : Pat < + (fcopysign f16:$src0, f32:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, + (V_LSHRREV_B32_e64 (i32 16), $src1)) +>; + +def : Pat < + (fcopysign f16:$src0, f64:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, + (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) +>; + +def : Pat < (fneg f16:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) >; @@ -692,6 +813,25 @@ def : Pat < (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit >; +def : Pat < + (fneg v2f16:$src), + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src) +>; + +def : Pat < + (fabs v2f16:$src), + (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src) +>; + +// This is really (fneg (fabs v2f16:$src)) +// +// fabs is not reported as free because there is modifier for it in +// VOP3P instructions, so it is turned into the bit op. +def : Pat < + (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))), + (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit +>; + /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ @@ -759,27 +899,6 @@ def : Pat < def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>; def : Pat < - (int_AMDGPU_cube v4f32:$src), - (REG_SEQUENCE VReg_128, - (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub0, - (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub1, - (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub2, - (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub3) ->; - -def : Pat < (i32 (sext i1:$src0)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) >; @@ -810,6 +929,14 @@ def : UMad24Pat<V_MAD_U32_U24>; defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>; def : ROTRPattern <V_ALIGNBIT_B32>; +def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), + (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), + (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; + +def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), + (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), + (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; + /********** ====================== **********/ /********** Indirect addressing **********/ /********** ====================== **********/ @@ -933,7 +1060,7 @@ def : Pat < class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat < (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), - (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) + (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>; @@ -985,6 +1112,11 @@ def : Pat < //===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// +def : Pat < + (i32 (AMDGPUfp16_zext f16:$src)), + (COPY $src) +>; + def : Pat < (i32 (trunc i64:$a)), @@ -1028,24 +1160,72 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> { defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>; // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>; +defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>; -def : BFEPattern <V_BFE_U32, S_MOV_B32>; +def : Pat< + (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), + (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0) +>; def : Pat< - (fcanonicalize f16:$src), - (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0) + (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), + (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0) >; def : Pat< - (fcanonicalize f32:$src), - (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0) + (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), + (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0) >; def : Pat< - (fcanonicalize f64:$src), - (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0) + (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), + (V_PK_MUL_F16 SRCMODS.OP_SEL_1, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) +>; + + +// Allow integer inputs +class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : Pat< + (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)), + (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en) >; +def : ExpPattern<AMDGPUexport, i32, EXP>; +def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>; + +def : Pat < + (v2i16 (build_vector i16:$src0, i16:$src1)), + (v2i16 (S_PACK_LL_B32_B16 $src0, $src1)) +>; + +// With multiple uses of the shift, this will duplicate the shift and +// increase register pressure. +def : Pat < + (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), + (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1)) +>; + +def : Pat < + (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))), + (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), + (v2i16 (S_PACK_HH_B32_B16 $src0, $src1)) +>; + +// TODO: Should source modifiers be matched to v_pack_b32_f16? +def : Pat < + (v2f16 (build_vector f16:$src0, f16:$src1)), + (v2f16 (S_PACK_LL_B32_B16 $src0, $src1)) +>; + +// def : Pat < +// (v2f16 (scalar_to_vector f16:$src0)), +// (COPY $src0) +// >; + +// def : Pat < +// (v2i16 (scalar_to_vector i16:$src0)), +// (COPY $src0) +// >; + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// @@ -1083,11 +1263,39 @@ def : Pat < // Miscellaneous Optimization Patterns //============================================================================// +// Undo sub x, c -> add x, -c canonicalization since c is more likely +// an inline immediate than -c. +// TODO: Also do for 64-bit. +def : Pat< + (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (S_SUB_I32 $src0, NegSubInlineConst32:$src1) +>; + def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>; def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>; def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>; +// This matches 16 permutations of +// max(min(x, y), min(max(x, y), z)) +class FPMed3Pat<ValueType vt, + Instruction med3Inst> : Pat< + (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), + (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), + (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))), + (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : FPMed3Pat<f32, V_MED3_F32>; + +let Predicates = [isGFX9] in { +def : FPMed3Pat<f16, V_MED3_F16>; +def : IntMed3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>; +def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>; +} // End Predicates = [isGFX9] + //============================================================================// // Assembler aliases //============================================================================// |