summaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp466
1 files changed, 279 insertions, 187 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 98d8b85..0091df7 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::CNEG: return "ARMISD::CNEG";
@@ -623,6 +624,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
@@ -2216,7 +2219,7 @@ static bool isFloatingPointZero(SDValue Op) {
/// the given operands.
SDValue
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG,
+ SDValue &ARMcc, SelectionDAG &DAG,
DebugLoc dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
@@ -2268,48 +2271,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
CompareType = ARMISD::CMPZ;
break;
}
- ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
}
-static bool canBitcastToInt(SDNode *Op) {
- return Op->hasOneUse() &&
- ISD::isNormalLoad(Op) &&
- Op->getValueType(0) == MVT::f32;
-}
-
-static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
- if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
- Ld->getChain(), Ld->getBasePtr(),
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
-
- llvm_unreachable("Unknown VFP cmp argument!");
-}
-
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue
-ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG,
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
DebugLoc dl) const {
- if (UnsafeFPMath && FiniteOnlyFPMath() &&
- (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
- CC == ISD::SETNE || CC == ISD::SETUNE) &&
- canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
- // If unsafe fp math optimization is enabled and there are no othter uses of
- // the CMP operands, and the condition code is EQ oe NE, we can optimize it
- // to an integer comparison.
- if (CC == ISD::SETOEQ)
- CC = ISD::SETEQ;
- else if (CC == ISD::SETUNE)
- CC = ISD::SETNE;
- LHS = bitcastToInt(LHS, DAG);
- RHS = bitcastToInt(RHS, DAG);
- return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
- }
-
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2328,59 +2297,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (LHS.getValueType() == MVT::i32) {
- SDValue ARMCC;
+ SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMCC, CCR, Cmp);
+ ARMcc, CCR, Cmp);
if (CondCode2 != ARMCC::AL) {
- SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
- Result, TrueVal, ARMCC2, CCR, Cmp2);
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
}
return Result;
}
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool SeenZero = false;
+ if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+ canChangeToInt(RHS, SeenZero, Subtarget) &&
+ // If one of the operand is zero, it's safe to ignore the NaN case.
+ (FiniteOnlyFPMath() || SeenZero)) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = bitcastf32Toi32(LHS, DAG);
+ RHS = bitcastf32Toi32(RHS, DAG);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
+ SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue Dest = Op.getOperand(4);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
DebugLoc dl = Op.getDebugLoc();
if (LHS.getValueType() == MVT::i32) {
- SDValue ARMCC;
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMCC, CCR,Cmp);
+ Chain, Dest, ARMcc, CCR, Cmp);
}
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
if (CondCode2 != ARMCC::AL) {
- ARMCC = DAG.getConstant(CondCode2, MVT::i32);
- SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
}
return Res;
@@ -2469,12 +2563,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
- SDValue Cmp = getVFPCmp(Tmp1, FP0,
- ISD::SETLT, ARMCC, DAG, dl);
+ SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2553,51 +2646,18 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
-///
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
-
- // Zero vectors are used to represent vector negation and in those cases
- // will be implemented with the NEON VNEG instruction. However, VNEG does
- // not support i64 elements, so sometimes the zero vectors will need to be
- // explicitly constructed. For those cases, and potentially other uses in
- // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
- // to their dest type. This ensures they get CSE'd.
- SDValue Vec;
- SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
- SmallVector<SDValue, 8> Ops;
- MVT TVT;
-
- if (VT.getSizeInBits() == 64) {
- Ops.assign(8, Cst); TVT = MVT::v8i8;
- } else {
- Ops.assign(16, Cst); TVT = MVT::v16i8;
- }
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
-}
-
-/// getOnesVector - Returns a vector of specified type with all bits set.
-///
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
- assert(VT.isVector() && "Expected a vector type");
-
- // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
- // dest type. This ensures they get CSE'd.
- SDValue Vec;
- SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
- SmallVector<SDValue, 8> Ops;
- MVT TVT;
-
- if (VT.getSizeInBits() == 64) {
- Ops.assign(8, Cst); TVT = MVT::v8i8;
- } else {
- Ops.assign(16, Cst); TVT = MVT::v16i8;
- }
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
}
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -2611,7 +2671,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMCC;
+ SDValue ARMcc;
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -2627,9 +2687,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, dl);
+ ARMcc, DAG, dl);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
@@ -2647,7 +2707,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMCC;
+ SDValue ARMcc;
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -2661,9 +2721,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, dl);
+ ARMcc, DAG, dl);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
@@ -2850,13 +2910,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
-/// operand (e.g., VMOV). If so, return either the constant being
-/// splatted or the encoded value, depending on the DoEncode parameter.
+/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- bool isVMOV, bool DoEncode) {
+ EVT &VT, bool is128Bits, bool isVMOV) {
unsigned OpCmode, Imm;
- EVT VT;
// SplatBitSize is set to the smallest size that splats the vector, so a
// zero vector will always have SplatBitSize == 8. However, NEON modified
@@ -2868,16 +2926,18 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
switch (SplatBitSize) {
case 8:
+ if (!isVMOV)
+ return SDValue();
// Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
OpCmode = 0xe;
Imm = SplatBits;
- VT = MVT::i8;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
- VT = MVT::i16;
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x00nn: Op=x, Cmode=100x.
OpCmode = 0x8;
@@ -2897,7 +2957,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
- VT = MVT::i32;
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x000000nn: Op=x, Cmode=000x.
OpCmode = 0;
@@ -2949,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
return SDValue();
case 64: {
- // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
if (!isVMOV)
return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
uint64_t BitMask = 0xff;
uint64_t Val = 0;
unsigned ImmMask = 1;
@@ -2969,7 +3029,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
// Op=1, Cmode=1110.
OpCmode = 0x1e;
SplatBits = Val;
- VT = MVT::i64;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
@@ -2978,32 +3038,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
return SDValue();
}
- if (DoEncode) {
- unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
- return DAG.getTargetConstant(EncodedVal, MVT::i32);
- }
- return DAG.getTargetConstant(SplatBits, VT);
-}
-
-/// getNEONModImm - If this is a valid vector constant for a NEON instruction
-/// with a "modified immediate" operand (e.g., VMOV) of the specified element
-/// size, return the encoded value for that immediate. The ByteSize field
-/// indicates the number of bytes of each element [1248].
-SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
- SelectionDAG &DAG) {
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ByteSize * 8))
- return SDValue();
-
- if (SplatBitSize > ByteSize * 8)
- return SDValue();
-
- return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG, isVMOV, true);
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
}
static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@@ -3194,43 +3230,6 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-
-static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
- // Canonicalize all-zeros and all-ones vectors.
- ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
- if (ConstVal->isNullValue())
- return getZeroVector(VT, DAG, dl);
- if (ConstVal->isAllOnesValue())
- return getOnesVector(VT, DAG, dl);
-
- EVT CanonicalVT;
- if (VT.is64BitVector()) {
- switch (Val.getValueType().getSizeInBits()) {
- case 8: CanonicalVT = MVT::v8i8; break;
- case 16: CanonicalVT = MVT::v4i16; break;
- case 32: CanonicalVT = MVT::v2i32; break;
- case 64: CanonicalVT = MVT::v1i64; break;
- default: llvm_unreachable("unexpected splat element type"); break;
- }
- } else {
- assert(VT.is128BitVector() && "unknown splat vector size");
- switch (Val.getValueType().getSizeInBits()) {
- case 8: CanonicalVT = MVT::v16i8; break;
- case 16: CanonicalVT = MVT::v8i16; break;
- case 32: CanonicalVT = MVT::v4i32; break;
- case 64: CanonicalVT = MVT::v2i64; break;
- default: llvm_unreachable("unexpected splat element type"); break;
- }
- }
-
- // Build a canonical splat for this value.
- SmallVector<SDValue, 8> Ops;
- Ops.assign(CanonicalVT.getVectorNumElements(), Val);
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
- Ops.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
-}
-
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
@@ -3244,11 +3243,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
// Check if an immediate VMOV works.
+ EVT VmovVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(),
- SplatBitSize, DAG, true, false);
- if (Val.getNode())
- return BuildSplat(Val, VT, DAG, dl);
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(), true);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+ ((1LL << SplatBitSize) - 1));
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(), false);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ }
}
}
@@ -3825,6 +3838,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -3941,6 +3963,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
}
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+ .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
case ARM::tANDsp:
case ARM::tADDspr_:
case ARM::tSUBspi_:
@@ -4180,6 +4242,35 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
return SDValue();
}
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant.
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BIT_CONVERT)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+ return DCI.CombineTo(N, Res, false);
+}
+
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
@@ -4558,6 +4649,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
OpenPOWER on IntegriCloud