diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86.td | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 153 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 8 |
5 files changed, 154 insertions, 28 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6c1a816..18e6b7c 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -17,14 +17,14 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// X86 Subtarget state. +// X86 Subtarget state // def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; //===----------------------------------------------------------------------===// -// X86 Subtarget features. +// X86 Subtarget features //===----------------------------------------------------------------------===// def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", @@ -97,7 +97,7 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", [FeatureAVX, FeatureSSE4A]>; def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", - [FeatureAVX, FeatureSSE4A]>; + [FeatureFMA4]>; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; @@ -226,7 +226,7 @@ def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI]>; + FeaturePOPCNT, FeatureBMI, FeatureFMA]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ea66a61..c77355f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { return SDValue(); } +// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 +// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the +// constraint of matching input/output vector elements. +SDValue +X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + EVT VT = Op.getValueType(); + unsigned NumElts = Op.getNumOperands(); + + // Check supported types and sub-targets. + // + // Only v2f32 -> v2f64 needs special handling. + if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) + return SDValue(); + + SDValue VecIn; + EVT VecInVT; + SmallVector<int, 8> Mask; + EVT SrcVT = MVT::Other; + + // Check the patterns could be translated into X86vfpext. + for (unsigned i = 0; i < NumElts; ++i) { + SDValue In = N->getOperand(i); + unsigned Opcode = In.getOpcode(); + + // Skip if the element is undefined. + if (Opcode == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Quit if one of the elements is not defined from 'fpext'. + if (Opcode != ISD::FP_EXTEND) + return SDValue(); + + // Check how the source of 'fpext' is defined. + SDValue L2In = In.getOperand(0); + EVT L2InVT = L2In.getValueType(); + + // Check the original type + if (SrcVT == MVT::Other) + SrcVT = L2InVT; + else if (SrcVT != L2InVT) // Quit if non-homogenous typed. + return SDValue(); + + // Check whether the value being 'fpext'ed is extracted from the same + // source. + Opcode = L2In.getOpcode(); + + // Quit if it's not extracted with a constant index. + if (Opcode != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(L2In.getOperand(1))) + return SDValue(); + + SDValue ExtractedFromVec = L2In.getOperand(0); + + if (VecIn.getNode() == 0) { + VecIn = ExtractedFromVec; + VecInVT = ExtractedFromVec.getValueType(); + } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. + return SDValue(); + + Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); + } + + // Fill the remaining mask as undef. + for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) + Mask.push_back(-1); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getVectorShuffle(VecInVT, DL, + VecIn, DAG.getUNDEF(VecInVT), + &Mask[0])); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; + SDValue FpExt = LowerVectorFpExtend(Op, DAG); + if (FpExt.getNode()) + return FpExt; + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -11122,9 +11202,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node, Results.push_back(Swap.getValue(1)); } -void X86TargetLowering:: +static void ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG, unsigned NewOp) const { + SelectionDAG &DAG, unsigned NewOp) { DebugLoc dl = Node->getDebugLoc(); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); @@ -11245,26 +11325,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG); - return; case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG); - return; - case ISD::ATOMIC_SWAP: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); + case ISD::ATOMIC_SWAP: { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::ATOMIC_LOAD_ADD: + Opc = X86ISD::ATOMADD64_DAG; + break; + case ISD::ATOMIC_LOAD_AND: + Opc = X86ISD::ATOMAND64_DAG; + break; + case ISD::ATOMIC_LOAD_NAND: + Opc = X86ISD::ATOMNAND64_DAG; + break; + case ISD::ATOMIC_LOAD_OR: + Opc = X86ISD::ATOMOR64_DAG; + break; + case ISD::ATOMIC_LOAD_SUB: + Opc = X86ISD::ATOMSUB64_DAG; + break; + case ISD::ATOMIC_LOAD_XOR: + Opc = X86ISD::ATOMXOR64_DAG; + break; + case ISD::ATOMIC_SWAP: + Opc = X86ISD::ATOMSWAP64_DAG; + break; + } + ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc); return; + } case ISD::ATOMIC_LOAD: ReplaceATOMIC_LOAD(N, Results, DAG); } @@ -11342,7 +11436,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; + case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -12792,16 +12888,31 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // String/text processing lowering. case X86::PCMPISTRM128REG: case X86::VPCMPISTRM128REG: - return EmitPCMP(MI, BB, 3, false /* in-mem */); case X86::PCMPISTRM128MEM: case X86::VPCMPISTRM128MEM: - return EmitPCMP(MI, BB, 3, true /* in-mem */); case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: - return EmitPCMP(MI, BB, 5, false /* in mem */); case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: - return EmitPCMP(MI, BB, 5, true /* in mem */); + case X86::VPCMPESTRM128MEM: { + unsigned NumArgs; + bool MemArg; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRM128REG: + case X86::VPCMPISTRM128REG: + NumArgs = 3; MemArg = false; break; + case X86::PCMPISTRM128MEM: + case X86::VPCMPISTRM128MEM: + NumArgs = 3; MemArg = true; break; + case X86::PCMPESTRM128REG: + case X86::VPCMPESTRM128REG: + NumArgs = 5; MemArg = false; break; + case X86::PCMPESTRM128MEM: + case X86::VPCMPESTRM128MEM: + NumArgs = 5; MemArg = true; break; + } + return EmitPCMP(MI, BB, NumArgs, MemArg); + } // Thread synchronization. case X86::MONITOR: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9123ebd..896d067 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -227,6 +227,9 @@ namespace llvm { // VSEXT_MOVL - Vector move low and sign extend. VSEXT_MOVL, + // VFPEXT - Vector FP extend. + VFPEXT, + // VSHL, VSRL - 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -828,6 +831,8 @@ namespace llvm { SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -859,9 +864,6 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const; - void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG, unsigned NewOp) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index d13167b..1db68c8 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -81,6 +81,11 @@ def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def X86vfpext : SDNode<"X86ISD::VFPEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>]>>; + def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e4c35b9..20dc81e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2101,12 +2101,20 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), (VCVTPD2PSYrm addr:$src)>; + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), + (VCVTPS2PDrr VR128:$src)>; def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), (VCVTPS2PDYrr VR128:$src)>; def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), (VCVTPS2PDYrm addr:$src)>; } +let Predicates = [HasSSE2] in { + // Match fextend for 128 conversions + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), + (CVTPS2PDrr VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// |