diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86InstrXOP.td')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrXOP.td | 203 |
1 files changed, 76 insertions, 127 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td index 2b296e1..5dde2d0 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td +++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td @@ -111,7 +111,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - XOP_4V, VEX_W, Sched<[WriteVarVecShift]>; + XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData<NAME#rr>; } let ExeDomain = SSEPackedInt in { @@ -183,6 +183,27 @@ let ExeDomain = SSEPackedInt in { defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>; } +// IFMA patterns - for cases where we can safely ignore the overflow bits from +// the multiply or easily match with existing intrinsics. +let Predicates = [HasXOP] in { + def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)), + (v8i16 VR128:$src3))), + (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)), + (v4i32 VR128:$src3))), + (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(v2i64 (add (X86pmuldq (X86PShufd (v4i32 VR128:$src1), (i8 -11)), + (X86PShufd (v4i32 VR128:$src2), (i8 -11))), + (v2i64 VR128:$src3))), + (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(v2i64 (add (X86pmuldq (v4i32 VR128:$src1), (v4i32 VR128:$src2)), + (v2i64 VR128:$src3))), + (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)), + (v4i32 VR128:$src3))), + (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>; +} + // Instruction where second source can be memory, third must be imm8 multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> { let isCommutable = 1 in @@ -261,7 +282,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData<NAME#rrr>; } let ExeDomain = SSEPackedInt in { @@ -269,159 +290,87 @@ let ExeDomain = SSEPackedInt in { } // Instruction where either second or third source can be memory -multiclass xop4op_int<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256> { - // 128-bit Instruction - def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), +multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, ValueType VT> { + def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, - XOP_4V; - def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i128mem:$src3), + [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), + (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V; + def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int128 VR128:$src1, VR128:$src2, - (bitconvert (loadv2i64 addr:$src3))))]>, + [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1), + (X86andnp (load addr:$src3), RC:$src2))))]>, XOP_4V, VEX_W; - def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, VR128:$src3), + def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int128 VR128:$src1, (bitconvert (loadv2i64 addr:$src2)), - VR128:$src3))]>, + [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), + (X86andnp RC:$src3, (load addr:$src2)))))]>, XOP_4V; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in - def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; - - // 256-bit Instruction - def rrrY : IXOPi8Reg<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, - XOP_4V, VEX_L; - def rrmY : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, i256mem:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, - (Int256 VR256:$src1, VR256:$src2, - (bitconvert (loadv4i64 addr:$src3))))]>, - XOP_4V, VEX_W, VEX_L; - def rmrY : IXOPi8Reg<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, f256mem:$src2, VR256:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, - (Int256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2)), - VR256:$src3))]>, - XOP_4V, VEX_L; - // For disassembler - let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in - def rrrY_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), + def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W, VEX_L; + []>, XOP_4V, VEX_W, FoldGenData<NAME#rrr>; } let ExeDomain = SSEPackedInt in { - defm VPCMOV : xop4op_int<0xA2, "vpcmov", - int_x86_xop_vpcmov, int_x86_xop_vpcmov_256>; + defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64>; + defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64>, VEX_L; } -let Predicates = [HasXOP] in { - def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1), - (X86andnp VR128:$src3, VR128:$src2))), - (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>; - - def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1), - (X86andnp VR256:$src3, VR256:$src2))), - (VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>; -} - -multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType vt128, ValueType vt256, - ValueType id128, ValueType id256, - PatFrag ld_128, PatFrag ld_256> { - def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4), +multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, + X86MemOperand intmemop, X86MemOperand fpmemop, + ValueType VT, PatFrag FPLdFrag, + PatFrag IntLdFrag> { + def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - [(set VR128:$dst, - (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), - (id128 VR128:$src3), (i8 imm:$src4))))]>; - def rm : IXOP5<opc, MRMSrcMemOp4, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4), + [(set RC:$dst, + (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>; + def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst), + (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - [(set VR128:$dst, - (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), - (id128 (bitconvert (loadv2i64 addr:$src3))), - (i8 imm:$src4))))]>, - VEX_W; - def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4), + [(set RC:$dst, + (VT (X86vpermil2 RC:$src1, RC:$src2, + (bitconvert (IntLdFrag addr:$src3)), + (i8 imm:$src4))))]>, VEX_W; + def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - [(set VR128:$dst, - (vt128 (OpNode (vt128 VR128:$src1), - (vt128 (bitconvert (ld_128 addr:$src2))), - (id128 VR128:$src3), (i8 imm:$src4))))]>; + [(set RC:$dst, + (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), + RC:$src3, (i8 imm:$src4))))]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in - def rr_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4), - !strconcat(OpcodeStr, - "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W; - - def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4), + def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - [(set VR256:$dst, - (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2), - (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L; - def rmY : IXOP5<opc, MRMSrcMemOp4, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4), - !strconcat(OpcodeStr, - "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - [(set VR256:$dst, - (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2), - (id256 (bitconvert (loadv4i64 addr:$src3))), - (i8 imm:$src4))))]>, VEX_W, VEX_L; - def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4), - !strconcat(OpcodeStr, - "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - [(set VR256:$dst, - (vt256 (OpNode (vt256 VR256:$src1), - (vt256 (bitconvert (ld_256 addr:$src2))), - (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L; - // For disassembler - let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in - def rrY_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4), - !strconcat(OpcodeStr, - "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W, VEX_L; + []>, VEX_W, FoldGenData<NAME#rr>; } -let ExeDomain = SSEPackedDouble in - defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64, - v2i64, v4i64, loadv2f64, loadv4f64>; +let ExeDomain = SSEPackedDouble in { + defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem, + v2f64, loadv2f64, loadv2i64>; + defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem, + v4f64, loadv4f64, loadv4i64>, VEX_L; +} -let ExeDomain = SSEPackedSingle in - defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32, - v4i32, v8i32, loadv4f32, loadv8f32>; +let ExeDomain = SSEPackedSingle in { + defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem, + v4f32, loadv4f32, loadv2i64>; + defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem, + v8f32, loadv8f32, loadv4i64>, VEX_L; +} |