summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86/X86InstrXOP.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86InstrXOP.td')
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrXOP.td203
1 files changed, 76 insertions, 127 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index 2b296e1..5dde2d0 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -111,7 +111,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>,
- XOP_4V, VEX_W, Sched<[WriteVarVecShift]>;
+ XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedInt in {
@@ -183,6 +183,27 @@ let ExeDomain = SSEPackedInt in {
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
}
+// IFMA patterns - for cases where we can safely ignore the overflow bits from
+// the multiply or easily match with existing intrinsics.
+let Predicates = [HasXOP] in {
+ def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v8i16 VR128:$src3))),
+ (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v2i64 (add (X86pmuldq (X86PShufd (v4i32 VR128:$src1), (i8 -11)),
+ (X86PShufd (v4i32 VR128:$src2), (i8 -11))),
+ (v2i64 VR128:$src3))),
+ (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v2i64 (add (X86pmuldq (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+ (v2i64 VR128:$src3))),
+ (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+}
+
// Instruction where second source can be memory, third must be imm8
multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> {
let isCommutable = 1 in
@@ -261,7 +282,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W;
+ []>, XOP_4V, VEX_W, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
@@ -269,159 +290,87 @@ let ExeDomain = SSEPackedInt in {
}
// Instruction where either second or third source can be memory
-multiclass xop4op_int<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256> {
- // 128-bit Instruction
- def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT> {
+ def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>,
- XOP_4V;
- def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+ (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V;
+ def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2,
- (bitconvert (loadv2i64 addr:$src3))))]>,
+ [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
+ (X86andnp (load addr:$src3), RC:$src2))))]>,
XOP_4V, VEX_W;
- def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- VR128:$src3))]>,
+ [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+ (X86andnp RC:$src3, (load addr:$src2)))))]>,
XOP_4V;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W;
-
- // 256-bit Instruction
- def rrrY : IXOPi8Reg<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>,
- XOP_4V, VEX_L;
- def rrmY : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2,
- (bitconvert (loadv4i64 addr:$src3))))]>,
- XOP_4V, VEX_W, VEX_L;
- def rmrY : IXOPi8Reg<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2)),
- VR256:$src3))]>,
- XOP_4V, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrrY_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, VEX_L;
+ []>, XOP_4V, VEX_W, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
- defm VPCMOV : xop4op_int<0xA2, "vpcmov",
- int_x86_xop_vpcmov, int_x86_xop_vpcmov_256>;
+ defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64>;
+ defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64>, VEX_L;
}
-let Predicates = [HasXOP] in {
- def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
- (X86andnp VR128:$src3, VR128:$src2))),
- (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
-
- def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
- (X86andnp VR256:$src3, VR256:$src2))),
- (VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
-}
-
-multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType vt128, ValueType vt256,
- ValueType id128, ValueType id256,
- PatFrag ld_128, PatFrag ld_256> {
- def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
+multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand intmemop, X86MemOperand fpmemop,
+ ValueType VT, PatFrag FPLdFrag,
+ PatFrag IntLdFrag> {
+ def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
- def rm : IXOP5<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>;
+ def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 (bitconvert (loadv2i64 addr:$src3))),
- (i8 imm:$src4))))]>,
- VEX_W;
- def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2,
+ (bitconvert (IntLdFrag addr:$src3)),
+ (i8 imm:$src4))))]>, VEX_W;
+ def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (ld_128 addr:$src2))),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
+ RC:$src3, (i8 imm:$src4))))]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rr_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W;
-
- def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
+ def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- def rmY : IXOP5<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 (bitconvert (loadv4i64 addr:$src3))),
- (i8 imm:$src4))))]>, VEX_W, VEX_L;
- def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1),
- (vt256 (bitconvert (ld_256 addr:$src2))),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrY_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, VEX_L;
+ []>, VEX_W, FoldGenData<NAME#rr>;
}
-let ExeDomain = SSEPackedDouble in
- defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
- v2i64, v4i64, loadv2f64, loadv4f64>;
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
+ v2f64, loadv2f64, loadv2i64>;
+ defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
+ v4f64, loadv4f64, loadv4i64>, VEX_L;
+}
-let ExeDomain = SSEPackedSingle in
- defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
- v4i32, v8i32, loadv4f32, loadv8f32>;
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
+ v4f32, loadv4f32, loadv2i64>;
+ defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
+ v8f32, loadv8f32, loadv4i64>, VEX_L;
+}
OpenPOWER on IntegriCloud