diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/SIInstructions.td')
-rw-r--r-- | contrib/llvm/lib/Target/R600/SIInstructions.td | 798 |
1 files changed, 618 insertions, 180 deletions
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td index 0d50c5d..76f05eb 100644 --- a/contrib/llvm/lib/Target/R600/SIInstructions.td +++ b/contrib/llvm/lib/Target/R600/SIInstructions.td @@ -22,8 +22,10 @@ def InterpSlot : Operand<i32> { let PrintMethod = "printInterpSlot"; } -def isSI : Predicate<"Subtarget.device()" - "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; +def isSI : Predicate<"Subtarget.getGeneration() " + ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; + +def WAIT_FLAG : InstFlag<"printWaitFlag">; let Predicates = [isSI] in { @@ -126,8 +128,11 @@ def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; } // End isCompare = 1 -def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; -def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; +let Defs = [SCC], isCommutable = 1 in { + def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; + def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; +} + //def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>; def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>; @@ -138,19 +143,19 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; let isCompare = 1 in { defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">; -defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>; -defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>; -defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>; -defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>; -defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>; -defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>; -defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">; -defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">; +defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_OLT>; +defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_OEQ>; +defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_OLE>; +defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_OGT>; +defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32">; +defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_OGE>; +defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", f32, COND_O>; +defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", f32, COND_UO>; defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">; defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">; defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">; defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">; -defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>; +defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>; defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">; defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">; @@ -176,19 +181,19 @@ defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">; } // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; -defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">; -defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">; -defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">; -defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">; +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>; +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_OEQ>; +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_OLE>; +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_OGT>; defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">; -defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">; -defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">; -defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">; +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_OGE>; +defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", f64, COND_O>; +defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", f64, COND_UO>; defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">; defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">; defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">; defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">; -defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">; +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>; defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; @@ -290,12 +295,12 @@ defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">; } // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">; -defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>; +defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_SLT>; defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>; -defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>; -defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>; +defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_SLE>; +defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_SGT>; defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>; -defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>; +defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>; defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">; let hasSideEffects = 1, Defs = [EXEC] in { @@ -312,12 +317,12 @@ defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">; } // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">; -defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">; -defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">; -defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">; -defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">; -defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">; -defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">; +defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>; +defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", i64, COND_EQ>; +defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", i64, COND_SLE>; +defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", i64, COND_SGT>; +defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>; +defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>; defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">; let hasSideEffects = 1, Defs = [EXEC] in { @@ -334,12 +339,12 @@ defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">; } // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">; -defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">; -defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">; -defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">; -defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">; -defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">; -defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">; +defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>; +defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", i32, COND_EQ>; +defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", i32, COND_ULE>; +defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", i32, COND_UGT>; +defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>; +defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>; defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">; let hasSideEffects = 1, Defs = [EXEC] in { @@ -356,12 +361,12 @@ defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">; } // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">; -defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">; -defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">; -defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">; -defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">; -defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">; -defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">; +defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>; +defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", i64, COND_EQ>; +defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", i64, COND_ULE>; +defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", i64, COND_UGT>; +defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>; +defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>; defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">; let hasSideEffects = 1, Defs = [EXEC] in { @@ -391,32 +396,52 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 +def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; +def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; +def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; +def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; +def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; +def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; +def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>; +def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>; +def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>; +def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>; + //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; -def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; //def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>; //def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; //def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; //def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; -//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>; -//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; -//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; -//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; -def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; -def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; -//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; -//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; +defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <0x00000008, "BUFFER_LOAD_UBYTE", VReg_32>; +defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <0x00000009, "BUFFER_LOAD_SBYTE", VReg_32>; +defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <0x0000000a, "BUFFER_LOAD_USHORT", VReg_32>; +defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32>; +defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; + +def BUFFER_STORE_BYTE : MUBUF_Store_Helper < + 0x00000018, "BUFFER_STORE_BYTE", VReg_32 +>; + +def BUFFER_STORE_SHORT : MUBUF_Store_Helper < + 0x0000001a, "BUFFER_STORE_SHORT", VReg_32 +>; def BUFFER_STORE_DWORD : MUBUF_Store_Helper < - 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32 + 0x0000001c, "BUFFER_STORE_DWORD", VReg_32 >; def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < - 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64 + 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64 +>; + +def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < + 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128 >; -//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; //def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; @@ -457,21 +482,24 @@ def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>; +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>; +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>; +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>; let mayLoad = 1 in { -defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>; +// We are using the SGPR_32 and not the SReg_32 register class for 32-bit +// SMRD instructions, because the SGPR_32 register class does not include M0 +// and writing to M0 from an SMRD instruction will hang the GPU. +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>; defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; defm S_BUFFER_LOAD_DWORD : SMRD_Helper < - 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32 >; defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < @@ -494,8 +522,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; -//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; -def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; +defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">; +defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; @@ -504,7 +532,7 @@ def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; -def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; +defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">; //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; @@ -522,20 +550,20 @@ def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; -def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">; +defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">; //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; -def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">; +defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">; //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; -def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">; -def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">; +defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">; +defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">; +defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; -//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; +defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; -def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; +defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">; +defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; @@ -597,15 +625,21 @@ defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; } // End neverHasSideEffects = 1, isMoveImm = 1 defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; -//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; -//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; +defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64", + [(set i32:$dst, (fp_to_sint f64:$src0))] +>; +defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32", + [(set f64:$dst, (sint_to_fp i32:$src0))] +>; defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", [(set f32:$dst, (sint_to_fp i32:$src0))] >; defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", [(set f32:$dst, (uint_to_fp i32:$src0))] >; -defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; +defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", + [(set i32:$dst, (fp_to_uint f32:$src0))] +>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", [(set i32:$dst, (fp_to_sint f32:$src0))] >; @@ -615,8 +649,12 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; //defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; -//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; -//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; +defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64", + [(set f32:$dst, (fround f64:$src0))] +>; +defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32", + [(set f64:$dst, (fextend f32:$src0))] +>; //defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; //defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; //defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; @@ -657,12 +695,18 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 < [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; -defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; +defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", + [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] +>; defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; -defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; -defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; +defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", + [(set f32:$dst, (fsqrt f32:$src0))] +>; +defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", + [(set f64:$dst, (fsqrt f64:$src0))] +>; defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; @@ -768,9 +812,18 @@ def S_CBRANCH_EXECNZ : SOPP < } // End isBranch = 1 } // End isTerminator = 1 -//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; let hasSideEffects = 1 in { -def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", +def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", + [(int_AMDGPU_barrier_local)] +> { + let SIMM16 = 0; + let isBarrier = 1; + let hasCtrlDep = 1; + let mayLoad = 1; + let mayStore = 1; +} + +def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16", [] >; } // End hasSideEffects @@ -806,6 +859,23 @@ def : Pat < (V_CNDMASK_B32_e64 $src0, $src1, $src2) >; +def : Pat < + (i32 (trunc i64:$val)), + (EXTRACT_SUBREG $val, sub0) +>; + +//use two V_CNDMASK_B32_e64 instructions for f64 +def : Pat < + (f64 (select i1:$src2, f64:$src1, f64:$src0)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub0), + (EXTRACT_SUBREG $src1, sub0), + $src2), sub0), + (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub1), + (EXTRACT_SUBREG $src1, sub1), + $src2), sub1) +>; + defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; @@ -833,14 +903,16 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", [(set f32:$dst, (fmul f32:$src0, f32:$src1))] >; -} // End isCommutable = 1 -//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; +defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", + [(set i32:$dst, (mul I24:$src0, I24:$src1))] +>; //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; -//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; +defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", + [(set i32:$dst, (mul U24:$src0, U24:$src1))] +>; //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; -let isCommutable = 1 in { defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))] @@ -875,9 +947,13 @@ defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", >; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; +let hasPostISelHook = 1 in { + defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", [(set i32:$dst, (shl i32:$src0, i32:$src1))] >; + +} defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", @@ -897,20 +973,17 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; -//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; -//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; +defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; +defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC -defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", - [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] ->; - -defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", - [(set i32:$dst, (sub i32:$src0, i32:$src1))] ->; +// No patterns so that the scalar instructions are always selected. +// The scalar versions will be replaced with vector when needed later. +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>; +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>; defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; -let Uses = [VCC] in { // Carry-out comes from VCC +let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; @@ -948,8 +1021,12 @@ let neverHasSideEffects = 1 in { def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; -//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>; -//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>; +def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", + [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))] +>; +def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", + [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))] +>; } // End neverHasSideEffects def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; @@ -960,10 +1037,16 @@ def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; defm : BFIPatterns <V_BFI_B32>; -def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; -def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; +def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", + [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] +>; +def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", + [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] +>; //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; +def : ROTRPattern <V_ALIGNBIT_B32>; + def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; ////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; @@ -982,13 +1065,36 @@ def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; -def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>; -def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>; -def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>; + +def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64", + [(set i64:$dst, (shl i64:$src0, i32:$src1))] +>; +def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64", + [(set i64:$dst, (srl i64:$src0, i32:$src1))] +>; +def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", + [(set i64:$dst, (sra i64:$src0, i32:$src1))] +>; + +let isCommutable = 1 in { + def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; + +} // isCommutable = 1 + +def : Pat < + (fadd f64:$src0, f64:$src1), + (V_ADD_F64 $src0, $src1, (i64 0)) +>; + +def : Pat < + (fmul f64:$src0, f64:$src1), + (V_MUL_F64 $src0, $src1, (i64 0)) +>; + def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; let isCommutable = 1 in { @@ -1023,12 +1129,31 @@ def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; + +let Defs = [SCC] in { // Carry out goes to SCC +let isCommutable = 1 in { def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; +def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", + [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))] +>; +} // End isCommutable = 1 + def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; -def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; -def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; -def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; -def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; +def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", + [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))] +>; + +let Uses = [SCC] in { // Carry in comes from SCC +let isCommutable = 1 in { +def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", + [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; +} // End isCommutable = 1 + +def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", + [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; +} // End Uses = [SCC] +} // End Defs = [SCC] + def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; @@ -1060,7 +1185,9 @@ def : Pat < (S_OR_B64 $src0, $src1) >; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; -def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; +def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", + [(set i1:$dst, (xor i1:$src0, i1:$src1))] +>; def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>; @@ -1071,12 +1198,31 @@ def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; -def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; -def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; -def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; -def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; -def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; -def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; + +// Use added complexity so these patterns are preferred to the VALU patterns. +let AddedComplexity = 1 in { + +def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", + [(set i32:$dst, (shl i32:$src0, i32:$src1))] +>; +def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64", + [(set i64:$dst, (shl i64:$src0, i32:$src1))] +>; +def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", + [(set i32:$dst, (srl i32:$src0, i32:$src1))] +>; +def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64", + [(set i64:$dst, (srl i64:$src0, i32:$src1))] +>; +def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", + [(set i32:$dst, (sra i32:$src0, i32:$src1))] +>; +def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64", + [(set i64:$dst, (sra i64:$src0, i32:$src1))] +>; + +} // End AddedComplexity = 1 + def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; @@ -1096,7 +1242,7 @@ def LOAD_CONST : AMDGPUShaderInst < [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] >; -// SI Psuedo instructions. These are used by the CFG structurizer pass +// SI pseudo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. let mayLoad = 1, mayStore = 1, hasSideEffects = 1, @@ -1169,6 +1315,36 @@ def SI_KILL : InstSI < let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { +//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>; + +let UseNamedOperandTable = 1 in { + +def SI_RegisterLoad : AMDGPUShaderInst < + (outs VReg_32:$dst, SReg_64:$temp), + (ins FRAMEri64:$addr, i32imm:$chan), + "", [] +> { + let isRegisterLoad = 1; + let mayLoad = 1; +} + +class SIRegStore<dag outs> : AMDGPUShaderInst < + outs, + (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan), + "", [] +> { + let isRegisterStore = 1; + let mayStore = 1; +} + +let usesCustomInserter = 1 in { +def SI_RegisterStorePseudo : SIRegStore<(outs)>; +} // End usesCustomInserter = 1 +def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>; + + +} // End UseNamedOperandTable = 1 + def SI_INDIRECT_SRC : InstSI < (outs VReg_32:$dst, SReg_64:$temp), (ins unknown:$src, VSrc_32:$idx, i32imm:$off), @@ -1185,6 +1361,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI < let Constraints = "$src = $dst"; } +def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; @@ -1192,6 +1369,25 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; } // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] +let usesCustomInserter = 1 in { + +// This pseudo instruction takes a pointer as input and outputs a resource +// constant that can be used with the ADDR64 MUBUF instructions. +def SI_ADDR64_RSRC : InstSI < + (outs SReg_128:$srsrc), + (ins SReg_64:$ptr), + "", [] +>; + +def V_SUB_F64 : InstSI < + (outs VReg_64:$dst), + (ins VReg_64:$src0, VReg_64:$src1), + "V_SUB_F64 $dst, $src0, $src1", + [] +>; + +} // end usesCustomInserter + } // end IsCodeGenOnly, isPseudo def : Pat< @@ -1206,10 +1402,8 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset, - i32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, - $buf_idx_vgpr, $tlst, 0, 0, 0) + (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), + (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset) >; /* int_SI_export */ @@ -1220,66 +1414,94 @@ def : Pat < $src0, $src1, $src2, $src3) >; +def : Pat < + (f64 (fsub f64:$src0, f64:$src1)), + (V_SUB_F64 $src0, $src1) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ -/* int_SI_sample for simple 1D texture lookup */ +/* SIsample for simple 1D texture lookup */ def : Pat < - (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm), - (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) + (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm), + (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm), +class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT), +class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT), (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY), +class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleShadowPattern<Intrinsic name, MIMG opcode, +class SampleShadowPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW), + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleShadowArrayPattern<Intrinsic name, MIMG opcode, +class SampleShadowArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY), + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -/* int_SI_sample* for texture lookups consuming more address parameters */ -multiclass SamplePatterns<ValueType addr_type> { - def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>; - def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>; - def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>; - def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>; - def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>; - - def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>; - def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>; - def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>; - def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>; - - def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>; - def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>; - def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>; - def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>; +/* SIsample* for texture lookups consuming more address parameters */ +multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l, + MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b, +MIMG sample_d, MIMG sample_c_d, ValueType addr_type> { + def : SamplePattern <SIsample, sample, addr_type>; + def : SampleRectPattern <SIsample, sample, addr_type>; + def : SampleArrayPattern <SIsample, sample, addr_type>; + def : SampleShadowPattern <SIsample, sample_c, addr_type>; + def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>; + + def : SamplePattern <SIsamplel, sample_l, addr_type>; + def : SampleArrayPattern <SIsamplel, sample_l, addr_type>; + def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>; + def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>; + + def : SamplePattern <SIsampleb, sample_b, addr_type>; + def : SampleArrayPattern <SIsampleb, sample_b, addr_type>; + def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>; + def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>; + + def : SamplePattern <SIsampled, sample_d, addr_type>; + def : SampleArrayPattern <SIsampled, sample_d, addr_type>; + def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>; + def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>; } -defm : SamplePatterns<v2i32>; -defm : SamplePatterns<v4i32>; -defm : SamplePatterns<v8i32>; -defm : SamplePatterns<v16i32>; +defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2, + IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2, + IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2, + IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2, + v2i32>; +defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4, + IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4, + IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4, + IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4, + v4i32>; +defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8, + IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8, + IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8, + IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8, + v8i32>; +defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16, + IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16, + IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16, + IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16, + v16i32>; /* int_SI_imageload for texture fetches consuming varying address parameters */ class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < @@ -1292,23 +1514,46 @@ class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) >; -multiclass ImageLoadPatterns<ValueType addr_type> { - def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>; - def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>; +class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < + (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA), + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) +>; + +class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < + (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA), + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) +>; + +multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> { + def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>; + def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>; +} + +multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> { + def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>; + def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>; } -defm : ImageLoadPatterns<v2i32>; -defm : ImageLoadPatterns<v4i32>; +defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>; +defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>; + +defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>; +defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>; /* Image resource information */ def : Pat < (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm), - (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) + (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) >; def : Pat < (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY), - (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) + (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) +>; + +def : Pat < + (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA), + (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) >; /********** ============================================ **********/ @@ -1379,22 +1624,30 @@ foreach Index = 0-15 in { >; } -def : Vector1_Build <v1i32, i32, VReg_32>; -def : Vector2_Build <v2i32, i32>; -def : Vector2_Build <v2f32, f32>; -def : Vector4_Build <v4i32, i32>; -def : Vector4_Build <v4f32, f32>; -def : Vector8_Build <v8i32, i32>; -def : Vector8_Build <v8f32, f32>; -def : Vector16_Build <v16i32, i32>; -def : Vector16_Build <v16f32, f32>; - def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; def : BitConvert <f32, i32, SReg_32>; def : BitConvert <f32, i32, VReg_32>; +def : BitConvert <i64, f64, VReg_64>; + +def : BitConvert <f64, i64, VReg_64>; + +def : BitConvert <v2f32, v2i32, VReg_64>; +def : BitConvert <v2i32, v2f32, VReg_64>; +def : BitConvert <v2i32, i64, VReg_64>; + +def : BitConvert <v4f32, v4i32, VReg_128>; +def : BitConvert <v4i32, v4f32, VReg_128>; +def : BitConvert <v4i32, i128, VReg_128>; +def : BitConvert <i128, v4i32, VReg_128>; + +def : BitConvert <v8i32, v32i8, SReg_256>; +def : BitConvert <v32i8, v8i32, SReg_256>; +def : BitConvert <v8i32, v32i8, VReg_256>; +def : BitConvert <v32i8, v8i32, VReg_256>; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ @@ -1422,6 +1675,16 @@ def : Pat < /********** ================== **********/ def : Pat < + (SGPRImm<(i32 imm)>:$imm), + (S_MOV_B32 imm:$imm) +>; + +def : Pat < + (SGPRImm<(f32 fpimm)>:$imm), + (S_MOV_B32 fpimm:$imm) +>; + +def : Pat < (i32 imm:$imm), (V_MOV_B32_e32 imm:$imm) >; @@ -1449,6 +1712,13 @@ def : Pat < (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) >; +def : Pat < + (f64 fpimm:$imm), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0), + (V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1) +>; + /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ @@ -1483,6 +1753,11 @@ def : Pat< (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) >; +def : Pat< + (fdiv f64:$src0, f64:$src1), + (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) +>; + def : Pat < (fcos f32:$src0), (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) @@ -1521,20 +1796,20 @@ def : Pat < // 1. Offset as 8bit DWORD immediate def : Pat < - (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset), + (SIload_constant i128:$sbase, IMM8bitDWORD:$offset), (S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset) >; // 2. Offset loaded in an 32bit SGPR def : Pat < - (int_SI_load_const v16i8:$sbase, imm:$offset), + (SIload_constant i128:$sbase, imm:$offset), (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; // 3. Offset in an 32Bit VGPR def : Pat < - (int_SI_load_const v16i8:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, $voff, $sbase, 0, 0, 0) + (SIload_constant i128:$sbase, i32:$voff), + (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff) >; // The multiplication scales from [0,1] to the unsigned integer range @@ -1545,6 +1820,12 @@ def : Pat < (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; +def : Pat < + (int_SI_tid), + (V_MBCNT_HI_U32_B32_e32 0xffffffff, + (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0)) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ @@ -1554,6 +1835,40 @@ def : Pat < (V_MAD_F32 $src0, $src1, $src2) >; +/********** ======================= **********/ +/********** Load/Store Patterns **********/ +/********** ======================= **********/ + +class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat < + (frag i32:$src0), + (vt (inst 0, $src0, $src0, $src0, 0, 0)) +>; + +def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>; +def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>; +def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>; +def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>; +def : DSReadPat <DS_READ_B32, i32, local_load>; +def : Pat < + (local_load i32:$src0), + (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) +>; + +class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat < + (frag i32:$src1, i32:$src0), + (inst 0, $src0, $src1, $src1, 0, 0) +>; + +def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>; +def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>; +def : DSWritePat <DS_WRITE_B32, i32, local_store>; + +def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), + (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>; + +def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), + (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ @@ -1581,8 +1896,100 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; -defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; +defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>; +defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>; +defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>; +defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; +defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>; +defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>; + +//===----------------------------------------------------------------------===// +// MUBUF Patterns +//===----------------------------------------------------------------------===// + +multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, + PatFrag global_ld, PatFrag constant_ld> { + def : Pat < + (vt (global_ld (add i64:$ptr, (i64 IMM12bit:$offset)))), + (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset)) + >; + + def : Pat < + (vt (global_ld i64:$ptr)), + (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, 0) + >; + + def : Pat < + (vt (global_ld (add i64:$ptr, i64:$offset))), + (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0) + >; + + def : Pat < + (vt (constant_ld (add i64:$ptr, i64:$offset))), + (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0) + >; +} + +defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, + sextloadi8_global, sextloadi8_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, + az_extloadi8_global, az_extloadi8_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, + sextloadi16_global, sextloadi16_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, + az_extloadi16_global, az_extloadi16_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, + global_load, constant_load>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, + global_load, constant_load>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, + az_extloadi32_global, az_extloadi32_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, + global_load, constant_load>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, + global_load, constant_load>; + +multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> { + + def : Pat < + (st vt:$value, i64:$ptr), + (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0) + >; + + def : Pat < + (st vt:$value, (add i64:$ptr, i64:$offset)), + (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0) + >; +} + +defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>; +defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>; +defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>; +defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>; +defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; +defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; + +//===----------------------------------------------------------------------===// +// MTBUF Patterns +//===----------------------------------------------------------------------===// + +// TBUFFER_STORE_FORMAT_*, addr64=0 +class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat< + (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + i32:$soffset, imm:$inst_offset, imm:$dfmt, + imm:$nfmt, imm:$offen, imm:$idxen, + imm:$glc, imm:$slc, imm:$tfe), + (opcode + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, + (as_i1imm $slc), (as_i1imm $tfe), $soffset) +>; + +def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>; +def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>; +def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>; +def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; /********** ====================== **********/ /********** Indirect adressing **********/ @@ -1592,25 +1999,25 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> { // 1. Extract with offset def : Pat< - (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))), + (vector_extract vt:$vec, (add i32:$idx, imm:$off)), (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off)) >; // 2. Extract without offset def : Pat< - (vector_extract vt:$vec, (i64 (zext i32:$idx))), + (vector_extract vt:$vec, i32:$idx), (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0)) >; // 3. Insert with offset def : Pat< - (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))), + (vector_insert vt:$vec, f32:$val, (add i32:$idx, imm:$off)), (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val) >; // 4. Insert without offset def : Pat< - (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))), + (vector_insert vt:$vec, f32:$val, i32:$idx), (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val) >; } @@ -1634,6 +2041,37 @@ def : Pat< (V_CMP_U_F32_e64 $src0, $src1) >; +//===----------------------------------------------------------------------===// +// Miscellaneous Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (i64 (trunc i128:$x)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG $x, sub0)), sub0), + (i32 (EXTRACT_SUBREG $x, sub1)), sub1) +>; + +def : Pat < + (i32 (trunc i64:$a)), + (EXTRACT_SUBREG $a, sub0) +>; + +// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector +// case, the sgpr-copies pass will fix this to use the vector version. +def : Pat < + (i32 (addc i32:$src0, i32:$src1)), + (S_ADD_I32 $src0, $src1) +>; + +def : Pat < + (or i64:$a, i64:$b), + (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), + (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub0), (EXTRACT_SUBREG $b, sub0)), sub0), + (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub1), (EXTRACT_SUBREG $b, sub1)), sub1) +>; + //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// |