summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp153
1 files changed, 132 insertions, 21 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ea66a61..c77355f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
return SDValue();
}
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = Op.getNumOperands();
+
+ // Check supported types and sub-targets.
+ //
+ // Only v2f32 -> v2f64 needs special handling.
+ if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+ return SDValue();
+
+ SDValue VecIn;
+ EVT VecInVT;
+ SmallVector<int, 8> Mask;
+ EVT SrcVT = MVT::Other;
+
+ // Check the patterns could be translated into X86vfpext.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opcode = In.getOpcode();
+
+ // Skip if the element is undefined.
+ if (Opcode == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Quit if one of the elements is not defined from 'fpext'.
+ if (Opcode != ISD::FP_EXTEND)
+ return SDValue();
+
+ // Check how the source of 'fpext' is defined.
+ SDValue L2In = In.getOperand(0);
+ EVT L2InVT = L2In.getValueType();
+
+ // Check the original type
+ if (SrcVT == MVT::Other)
+ SrcVT = L2InVT;
+ else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+ return SDValue();
+
+ // Check whether the value being 'fpext'ed is extracted from the same
+ // source.
+ Opcode = L2In.getOpcode();
+
+ // Quit if it's not extracted with a constant index.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(L2In.getOperand(1)))
+ return SDValue();
+
+ SDValue ExtractedFromVec = L2In.getOperand(0);
+
+ if (VecIn.getNode() == 0) {
+ VecIn = ExtractedFromVec;
+ VecInVT = ExtractedFromVec.getValueType();
+ } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+ return SDValue();
+
+ Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+ }
+
+ // Fill the remaining mask as undef.
+ for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+ Mask.push_back(-1);
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getVectorShuffle(VecInVT, DL,
+ VecIn, DAG.getUNDEF(VecInVT),
+ &Mask[0]));
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (Broadcast.getNode())
return Broadcast;
+ SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+ if (FpExt.getNode())
+ return FpExt;
+
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
@@ -11122,9 +11202,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
Results.push_back(Swap.getValue(1));
}
-void X86TargetLowering::
+static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) const {
+ SelectionDAG &DAG, unsigned NewOp) {
DebugLoc dl = Node->getDebugLoc();
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
@@ -11245,26 +11325,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
- return;
- case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ case ISD::ATOMIC_SWAP: {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::ATOMIC_LOAD_ADD:
+ Opc = X86ISD::ATOMADD64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Opc = X86ISD::ATOMAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ Opc = X86ISD::ATOMNAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ Opc = X86ISD::ATOMOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ Opc = X86ISD::ATOMSUB64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Opc = X86ISD::ATOMXOR64_DAG;
+ break;
+ case ISD::ATOMIC_SWAP:
+ Opc = X86ISD::ATOMSWAP64_DAG;
+ break;
+ }
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
+ }
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD(N, Results, DAG);
}
@@ -11342,7 +11436,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -12792,16 +12888,31 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
- return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
- return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
- return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
- case X86::VPCMPESTRM128MEM:
- return EmitPCMP(MI, BB, 5, true /* in mem */);
+ case X86::VPCMPESTRM128MEM: {
+ unsigned NumArgs;
+ bool MemArg;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ NumArgs = 3; MemArg = false; break;
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ NumArgs = 3; MemArg = true; break;
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ NumArgs = 5; MemArg = false; break;
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ NumArgs = 5; MemArg = true; break;
+ }
+ return EmitPCMP(MI, BB, NumArgs, MemArg);
+ }
// Thread synchronization.
case X86::MONITOR:
OpenPOWER on IntegriCloud