diff options
Diffstat (limited to 'lib/Target')
47 files changed, 728 insertions, 495 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index f1e6a9f..fa64d6c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -48,6 +48,8 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -129,7 +131,7 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureNEONForFP, FeatureT2ExtractPack]>; + FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index d316b13..92a13f1 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -519,9 +519,8 @@ namespace ARM_AM { // // This is stored in two operands [regaddr, align]. The first is the // address register. The second operand is the value of the alignment - // specifier to use or zero if no explicit alignment. - // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific - // instruction. + // specifier in bytes or zero if no explicit alignment. + // Valid alignments depend on the specific instruction. //===--------------------------------------------------------------------===// // NEON Modified Immediates diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 98d8b85..0091df7 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; + case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::CNEG: return "ARMISD::CNEG"; @@ -623,6 +624,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; + case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; + case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -2216,7 +2219,7 @@ static bool isFloatingPointZero(SDValue Op) { /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, + SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); @@ -2268,48 +2271,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, CompareType = ARMISD::CMPZ; break; } - ARMCC = DAG.getConstant(CondCode, MVT::i32); + ARMcc = DAG.getConstant(CondCode, MVT::i32); return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } -static bool canBitcastToInt(SDNode *Op) { - return Op->hasOneUse() && - ISD::isNormalLoad(Op) && - Op->getValueType(0) == MVT::f32; -} - -static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) { - if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) - return DAG.getLoad(MVT::i32, Op.getDebugLoc(), - Ld->getChain(), Ld->getBasePtr(), - Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); - - llvm_unreachable("Unknown VFP cmp argument!"); -} - /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue -ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, +ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl) const { - if (UnsafeFPMath && FiniteOnlyFPMath() && - (CC == ISD::SETEQ || CC == ISD::SETOEQ || - CC == ISD::SETNE || CC == ISD::SETUNE) && - canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) { - // If unsafe fp math optimization is enabled and there are no othter uses of - // the CMP operands, and the condition code is EQ oe NE, we can optimize it - // to an integer comparison. - if (CC == ISD::SETOEQ) - CC = ISD::SETEQ; - else if (CC == ISD::SETUNE) - CC = ISD::SETNE; - LHS = bitcastToInt(LHS, DAG); - RHS = bitcastToInt(RHS, DAG); - return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); - } - SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); @@ -2328,59 +2297,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (LHS.getValueType() == MVT::i32) { - SDValue ARMCC; + SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMCC, CCR, Cmp); + ARMcc, CCR, Cmp); if (CondCode2 != ARMCC::AL) { - SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); + SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = DAG.getNode(ARMISD::CMOV, dl, VT, - Result, TrueVal, ARMCC2, CCR, Cmp2); + Result, TrueVal, ARMcc2, CCR, Cmp2); } return Result; } +/// canChangeToInt - Given the fp compare operand, return true if it is suitable +/// to morph to an integer compare sequence. +static bool canChangeToInt(SDValue Op, bool &SeenZero, + const ARMSubtarget *Subtarget) { + SDNode *N = Op.getNode(); + if (!N->hasOneUse()) + // Otherwise it requires moving the value from fp to integer registers. + return false; + if (!N->getNumValues()) + return false; + EVT VT = Op.getValueType(); + if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) + // f32 case is generally profitable. f64 case only makes sense when vcmpe + + // vmrs are very slow, e.g. cortex-a8. + return false; + + if (isFloatingPointZero(Op)) { + SeenZero = true; + return true; + } + return ISD::isNormalLoad(N); +} + +static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { + if (isFloatingPointZero(Op)) + return DAG.getConstant(0, MVT::i32); + + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) + return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ld->getBasePtr(), + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + llvm_unreachable("Unknown VFP cmp argument!"); +} + +static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, + SDValue &RetVal1, SDValue &RetVal2) { + if (isFloatingPointZero(Op)) { + RetVal1 = DAG.getConstant(0, MVT::i32); + RetVal2 = DAG.getConstant(0, MVT::i32); + return; + } + + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { + SDValue Ptr = Ld->getBasePtr(); + RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ptr, + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + EVT PtrType = Ptr.getValueType(); + unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); + SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), + PtrType, Ptr, DAG.getConstant(4, PtrType)); + RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), NewPtr, + Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, + Ld->isVolatile(), Ld->isNonTemporal(), + NewAlign); + return; + } + + llvm_unreachable("Unknown VFP cmp argument!"); +} + +/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some +/// f32 and even f64 comparisons to integer ones. +SDValue +ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + bool SeenZero = false; + if (canChangeToInt(LHS, SeenZero, Subtarget) && + canChangeToInt(RHS, SeenZero, Subtarget) && + // If one of the operand is zero, it's safe to ignore the NaN case. + (FiniteOnlyFPMath() || SeenZero)) { + // If unsafe fp math optimization is enabled and there are no othter uses of + // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // to an integer comparison. + if (CC == ISD::SETOEQ) + CC = ISD::SETEQ; + else if (CC == ISD::SETUNE) + CC = ISD::SETNE; + + SDValue ARMcc; + if (LHS.getValueType() == MVT::f32) { + LHS = bitcastf32Toi32(LHS, DAG); + RHS = bitcastf32Toi32(RHS, DAG); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, + Chain, Dest, ARMcc, CCR, Cmp); + } + + SDValue LHS1, LHS2; + SDValue RHS1, RHS2; + expandf64Toi32(LHS, DAG, LHS1, LHS2); + expandf64Toi32(RHS, DAG, RHS1, RHS2); + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + ARMcc = DAG.getConstant(CondCode, MVT::i32); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + } + + return SDValue(); +} + SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue Dest = Op.getOperand(4); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); if (LHS.getValueType() == MVT::i32) { - SDValue ARMCC; + SDValue ARMcc; + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, - Chain, Dest, ARMCC, CCR,Cmp); + Chain, Dest, ARMcc, CCR, Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + + if (UnsafeFPMath && + (CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE)) { + SDValue Result = OptimizeVFPBrcond(Op, DAG); + if (Result.getNode()) + return Result; + } + ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; + SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); if (CondCode2 != ARMCC::AL) { - ARMCC = DAG.getConstant(CondCode2, MVT::i32); - SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; + ARMcc = DAG.getConstant(CondCode2, MVT::i32); + SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); } return Res; @@ -2469,12 +2563,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); - SDValue Cmp = getVFPCmp(Tmp1, FP0, - ISD::SETLT, ARMCC, DAG, dl); + SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); + return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2553,51 +2646,18 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { } /// getZeroVector - Returns a vector of specified type with all zero elements. -/// +/// Zero vectors are used to represent vector negation and in those cases +/// will be implemented with the NEON VNEG instruction. However, VNEG does +/// not support i64 elements, so sometimes the zero vectors will need to be +/// explicitly constructed. Regardless, use a canonical VMOV to create the +/// zero vector. static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - - // Zero vectors are used to represent vector negation and in those cases - // will be implemented with the NEON VNEG instruction. However, VNEG does - // not support i64 elements, so sometimes the zero vectors will need to be - // explicitly constructed. For those cases, and potentially other uses in - // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted - // to their dest type. This ensures they get CSE'd. - SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0, MVT::i8); - SmallVector<SDValue, 8> Ops; - MVT TVT; - - if (VT.getSizeInBits() == 64) { - Ops.assign(8, Cst); TVT = MVT::v8i8; - } else { - Ops.assign(16, Cst); TVT = MVT::v16i8; - } - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); - - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); -} - -/// getOnesVector - Returns a vector of specified type with all bits set. -/// -static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { - assert(VT.isVector() && "Expected a vector type"); - - // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their - // dest type. This ensures they get CSE'd. - SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); - SmallVector<SDValue, 8> Ops; - MVT TVT; - - if (VT.getSizeInBits() == 64) { - Ops.assign(8, Cst); TVT = MVT::v8i8; - } else { - Ops.assign(16, Cst); TVT = MVT::v16i8; - } - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); - - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + // The canonical modified immediate encoding of a zero vector is....0! + SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); + EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two @@ -2611,7 +2671,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMCC; + SDValue ARMcc; unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); @@ -2627,9 +2687,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, dl); + ARMcc, DAG, dl); SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; @@ -2647,7 +2707,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMCC; + SDValue ARMcc; assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, @@ -2661,9 +2721,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, dl); + ARMcc, DAG, dl); SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; @@ -2850,13 +2910,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" -/// operand (e.g., VMOV). If so, return either the constant being -/// splatted or the encoded value, depending on the DoEncode parameter. +/// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - bool isVMOV, bool DoEncode) { + EVT &VT, bool is128Bits, bool isVMOV) { unsigned OpCmode, Imm; - EVT VT; // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified @@ -2868,16 +2926,18 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, switch (SplatBitSize) { case 8: + if (!isVMOV) + return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); OpCmode = 0xe; Imm = SplatBits; - VT = MVT::i8; + VT = is128Bits ? MVT::v16i8 : MVT::v8i8; break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. - VT = MVT::i16; + VT = is128Bits ? MVT::v8i16 : MVT::v4i16; if ((SplatBits & ~0xff) == 0) { // Value = 0x00nn: Op=x, Cmode=100x. OpCmode = 0x8; @@ -2897,7 +2957,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. - VT = MVT::i32; + VT = is128Bits ? MVT::v4i32 : MVT::v2i32; if ((SplatBits & ~0xff) == 0) { // Value = 0x000000nn: Op=x, Cmode=000x. OpCmode = 0; @@ -2949,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); case 64: { - // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. if (!isVMOV) return SDValue(); + // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; uint64_t Val = 0; unsigned ImmMask = 1; @@ -2969,7 +3029,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Op=1, Cmode=1110. OpCmode = 0x1e; SplatBits = Val; - VT = MVT::i64; + VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } @@ -2978,32 +3038,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); } - if (DoEncode) { - unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); - return DAG.getTargetConstant(EncodedVal, MVT::i32); - } - return DAG.getTargetConstant(SplatBits, VT); -} - -/// getNEONModImm - If this is a valid vector constant for a NEON instruction -/// with a "modified immediate" operand (e.g., VMOV) of the specified element -/// size, return the encoded value for that immediate. The ByteSize field -/// indicates the number of bytes of each element [1248]. -SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, - SelectionDAG &DAG) { - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ByteSize * 8)) - return SDValue(); - - if (SplatBitSize > ByteSize * 8) - return SDValue(); - - return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG, isVMOV, true); + unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + return DAG.getTargetConstant(EncodedVal, MVT::i32); } static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, @@ -3194,43 +3230,6 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } - -static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { - // Canonicalize all-zeros and all-ones vectors. - ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); - if (ConstVal->isNullValue()) - return getZeroVector(VT, DAG, dl); - if (ConstVal->isAllOnesValue()) - return getOnesVector(VT, DAG, dl); - - EVT CanonicalVT; - if (VT.is64BitVector()) { - switch (Val.getValueType().getSizeInBits()) { - case 8: CanonicalVT = MVT::v8i8; break; - case 16: CanonicalVT = MVT::v4i16; break; - case 32: CanonicalVT = MVT::v2i32; break; - case 64: CanonicalVT = MVT::v1i64; break; - default: llvm_unreachable("unexpected splat element type"); break; - } - } else { - assert(VT.is128BitVector() && "unknown splat vector size"); - switch (Val.getValueType().getSizeInBits()) { - case 8: CanonicalVT = MVT::v16i8; break; - case 16: CanonicalVT = MVT::v8i16; break; - case 32: CanonicalVT = MVT::v4i32; break; - case 64: CanonicalVT = MVT::v2i64; break; - default: llvm_unreachable("unexpected splat element type"); break; - } - } - - // Build a canonical splat for this value. - SmallVector<SDValue, 8> Ops; - Ops.assign(CanonicalVT.getVectorNumElements(), Val); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], - Ops.size()); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); -} - // If this is a case we can't handle, return null and let the default // expansion code take care of it. static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { @@ -3244,11 +3243,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { // Check if an immediate VMOV works. + EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), - SplatBitSize, DAG, true, false); - if (Val.getNode()) - return BuildSplat(Val, VT, DAG, dl); + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VmovVT, VT.is128BitVector(), true); + if (Val.getNode()) { + SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + } + + // Try an immediate VMVN. + uint64_t NegatedImm = (SplatBits.getZExtValue() ^ + ((1LL << SplatBitSize) - 1)); + Val = isNEONModifiedImm(NegatedImm, + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VmovVT, VT.is128BitVector(), false); + if (Val.getNode()) { + SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + } } } @@ -3825,6 +3838,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +static +MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I != Succ) + return *I; + llvm_unreachable("Expecting a BB with two successors!"); +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -3941,6 +3963,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; } + case ARM::BCCi64: + case ARM::BCCZi64: { + // Compare both parts that make up the double comparison separately for + // equality. + bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; + + unsigned LHS1 = MI->getOperand(1).getReg(); + unsigned LHS2 = MI->getOperand(2).getReg(); + if (RHSisZero) { + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(LHS1).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(LHS2).addImm(0) + .addImm(ARMCC::EQ).addReg(ARM::CPSR); + } else { + unsigned RHS1 = MI->getOperand(3).getReg(); + unsigned RHS2 = MI->getOperand(4).getReg(); + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(LHS1).addReg(RHS1)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(LHS2).addReg(RHS2) + .addImm(ARMCC::EQ).addReg(ARM::CPSR); + } + + MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); + MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); + if (MI->getOperand(0).getImm() == ARMCC::NE) + std::swap(destMBB, exitMBB); + + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) + .addMBB(exitMBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + } + case ARM::tANDsp: case ARM::tADDspr_: case ARM::tSUBspi_: @@ -4180,6 +4242,35 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, return SDValue(); } +/// PerformVDUPLANECombine - Target-specific dag combine xforms for +/// ARMISD::VDUPLANE. +static SDValue PerformVDUPLANECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is + // redundant. + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BIT_CONVERT) + Op = Op.getOperand(0); + if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) + return SDValue(); + + // Make sure the VMOV element size is not bigger than the VDUPLANE elements. + unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); + // The canonical VMOV for a zero vector uses a 32-bit element size. + unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned EltBits; + if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) + EltSize = 8; + if (EltSize > VT.getVectorElementType().getSizeInBits()) + return SDValue(); + + SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); + return DCI.CombineTo(N, Res, false); +} + /// getVShiftImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. @@ -4558,6 +4649,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3a38669..128b72e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -53,6 +53,8 @@ namespace llvm { CMOV, // ARM conditional move instructions. CNEG, // ARM conditional negate instructions. + BCC_i64, + RBIT, // ARM bitreverse instruction FTOSI, // FP to sint within a FP register. @@ -122,6 +124,10 @@ namespace llvm { VGETLANEu, // zero-extend vector extract element VGETLANEs, // sign-extend vector extract element + // Vector move immediate and move negated immediate: + VMOVIMM, + VMVNIMM, + // Vector duplicate: VDUP, VDUPLANE, @@ -150,13 +156,6 @@ namespace llvm { /// Define some predicates that are used for node matching. namespace ARM { - /// getNEONModImm - If this is a valid vector constant for a NEON - /// instruction with a "modified immediate" operand (e.g., VMOV) of the - /// specified element size, return the encoded value for that immediate. - /// The ByteSize field indicates the number of bytes of each element [1248]. - SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, - SelectionDAG &DAG); - /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) /// instruction, returns its 8-bit integer representation. Otherwise, @@ -363,9 +362,11 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; - SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; + SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; + SDValue getVFPCmp(SDValue LHS, SDValue RHS, + SelectionDAG &DAG, DebugLoc dl) const; + + SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c73e204..51fc152 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -38,6 +38,12 @@ def SDT_ARMBr2JT : SDTypeProfile<0, 4, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, SDTCisVT<2, i32>, + SDTCisVT<3, i32>, SDTCisVT<4, i32>, + SDTCisVT<5, OtherVT>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -90,6 +96,9 @@ def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, [SDNPHasChain]>; +def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, + [SDNPHasChain]>; + def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; @@ -1685,13 +1694,19 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), } // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +// The assume-no-carry-in form uses the negation of the input since add/sub +// assume opposite meanings of the carry flag (i.e., carry == !borrow). +// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory +// details. def : ARMPat<(add GPR:$src, so_imm_neg:$imm), (SUBri GPR:$src, so_imm_neg:$imm)>; - -//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), -// (SUBSri GPR:$src, so_imm_neg:$imm)>; -//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm), -// (SBCri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), + (SUBSri GPR:$src, so_imm_neg:$imm)>; +// The with-carry-in form matches bitwise not instead of the negation. +// Effectively, the inverse interpretation of the carry flag already accounts +// for part of the negation. +def : ARMPat<(adde GPR:$src, so_imm_not:$imm), + (SBCri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2279,6 +2294,22 @@ defm CMNz : AI1_cmp_irs<0b1011, "cmn", def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), (CMNzri GPR:$src, so_imm_neg:$imm)>; +// Pseudo i64 compares for some floating point compares. +let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, + Defs = [CPSR] in { +def BCCi64 : PseudoInst<(outs), + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), + IIC_Br, + "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc", + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; + +def BCCZi64 : PseudoInst<(outs), + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), + IIC_Br, + "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc", + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>; +} // usesCustomInserter + // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a84315f..7f7eb98 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -65,6 +65,10 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; +def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; +def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -94,6 +98,20 @@ def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; +def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ + ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); + unsigned EltBits; + uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + return (EltBits == 32 && EltVal == 0); +}]>; + +def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ + ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); + unsigned EltBits; + uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + return (EltBits == 8 && EltVal == 0xff); +}]>; + //===----------------------------------------------------------------------===// // NEON operand definitions //===----------------------------------------------------------------------===// @@ -2318,10 +2336,10 @@ defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, // Vector Bitwise Operations. -def vnot8 : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>; -def vnot16 : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; +def vnotd : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; +def vnotq : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; // VAND : Vector Bitwise AND @@ -2347,36 +2365,58 @@ def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, "vbic", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (and DPR:$src1, - (vnot8 DPR:$src2))))]>; + (vnotd DPR:$src2))))]>; def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, "vbic", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, - (vnot16 QPR:$src2))))]>; + (vnotq QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, "vorn", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (or DPR:$src1, - (vnot8 DPR:$src2))))]>; + (vnotd DPR:$src2))))]>; def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, "vorn", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, - (vnot16 QPR:$src2))))]>; + (vnotq QPR:$src2))))]>; + +// VMVN : Vector Bitwise NOT (Immediate) + +let isReMaterializable = 1 in { +def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$dst, $SIMM", "", + [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>; +def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$dst, $SIMM", "", + [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>; + +def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$dst, $SIMM", "", + [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>; +def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$dst, $SIMM", "", + [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>; +} // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", - [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; + [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", - [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; -def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; -def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>; + [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>; +def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; +def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), @@ -2385,14 +2425,14 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), - (and DPR:$src3, (vnot8 DPR:$src1)))))]>; + (and DPR:$src3, (vnotd DPR:$src1)))))]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, IIC_VCNTiQ, "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), - (and QPR:$src3, (vnot16 QPR:$src1)))))]>; + (and QPR:$src3, (vnotq QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -2726,20 +2766,19 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, // Vector Negate. -def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; -def vneg8 : PatFrag<(ops node:$in), - (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>; -def vneg16 : PatFrag<(ops node:$in), - (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>; +def vnegd : PatFrag<(ops node:$in), + (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; +def vnegq : PatFrag<(ops node:$in), + (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>; + [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>; class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>; + [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>; // VNEG : Vector Negate (integer) def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; @@ -2759,12 +2798,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; -def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>; -def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>; -def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>; -def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>; -def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>; -def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>; +def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; +def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; +def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; +def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; +def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; +def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, @@ -2818,74 +2857,42 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), // VMOV : Vector Move (Immediate) -// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. -def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 1, true, *CurDAG); -}]>; -def vmovImm8 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm8>; - -// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. -def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 2, true, *CurDAG); -}]>; -def vmovImm16 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm16>; - -// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. -def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 4, true, *CurDAG); -}]>; -def vmovImm32 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm32>; - -// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. -def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 8, true, *CurDAG); -}]>; -def vmovImm64 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm64>; - let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", - [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; + [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", - [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; + [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>; def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", - [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; + [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>; def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", - [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; + [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", - [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>; +def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", - [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; + [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", - [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; + [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", - [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; + [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 4692f2a..bbe675e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -122,6 +122,10 @@ def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; }], imm_neg_XFORM>; +def imm0_255_not : PatLeaf<(i32 imm), [{ + return (uint32_t)(~N->getZExtValue()) < 255; +}], imm_comp_XFORM>; + // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 @@ -1391,13 +1395,32 @@ defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +// The assume-no-carry-in form uses the negation of the input since add/sub +// assume opposite meanings of the carry flag (i.e., carry == !borrow). +// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory +// details. +// The AddedComplexity preferences the first variant over the others since +// it can be shrunk to a 16-bit wide encoding, while the others cannot. +let AddedComplexity = 1 in +def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), + (t2SUBri GPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), + (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), + (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm), + (t2SUBSri GPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm), + (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>; +// The with-carry-in form matches bitwise not instead of the negation. +// Effectively, the inverse interpretation of the carry flag already accounts +// for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), - (t2SUBri GPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), - (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), - (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(adde GPR:$src, imm0_255_not:$imm), + (t2SBCSri GPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm), + (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -2435,7 +2458,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_Br, - "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8332bba..e7d92ed 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -54,6 +54,9 @@ protected: /// the VML[AS] instructions are slow (if so, don't use them). bool SlowVMLx; + /// SlowFPBrcc - True if floating point compare + branch is slow. + bool SlowFPBrcc; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -133,6 +136,7 @@ protected: bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } + bool isFPBrccSlow() const { return SlowFPBrcc; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8415d1a..4b08324 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -88,7 +88,7 @@ private: /// its register number, or -1 if there is no match. To allow return values /// to be used directly in register lists, arm registers have values between /// 0 and 15. - int MatchRegisterName(const StringRef &Name); + int MatchRegisterName(StringRef Name); /// } @@ -97,7 +97,7 @@ public: ARMAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -517,7 +517,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St, const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; - const StringRef &ShiftName = Tok.getString(); + StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") St = Lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") @@ -549,7 +549,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St, } /// A hack to allow some testing, to be replaced by a real table gen version. -int ARMAsmParser::MatchRegisterName(const StringRef &Name) { +int ARMAsmParser::MatchRegisterName(StringRef Name) { if (Name == "r0" || Name == "R0") return 0; else if (Name == "r1" || Name == "R1") @@ -593,7 +593,7 @@ MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst) { ARMOperand &Op0 = *(ARMOperand*)Operands[0]; assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); - const StringRef &Mnemonic = Op0.getToken(); + StringRef Mnemonic = Op0.getToken(); if (Mnemonic == "add" || Mnemonic == "stmfd" || Mnemonic == "str" || @@ -658,7 +658,7 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { } /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, +bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { OwningPtr<ARMOperand> Op; ARMOperand::CreateToken(Op, Name, NameLoc); @@ -761,7 +761,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); - const StringRef &Mode = Tok.getString(); + StringRef Mode = Tok.getString(); if (Mode == "unified" || Mode == "UNIFIED") Parser.Lex(); else if (Mode == "divided" || Mode == "DIVIDED") diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 6a40cf3..946f474 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -602,12 +602,8 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { - unsigned Align = MO2.getImm(); - assert((Align == 8 || Align == 16 || Align == 32) && - "unexpected NEON load/store alignment"); - Align <<= 3; // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << Align; + O << ", :" << (MO2.getImm() << 3); } O << "]"; } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 170819a..edc9345 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -442,7 +442,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + O << ", :" << (MO2.getImm() << 3); } O << "]"; } diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 85d5ca0..0cb8ff0 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -590,3 +590,70 @@ than the Z bit, we'll need additional logic to reverse the conditionals associated with the comparison. Perhaps a pseudo-instruction for the comparison, with a post-codegen pass to clean up and handle the condition codes? See PR5694 for testcase. + +//===---------------------------------------------------------------------===// + +Given the following on armv5: +int test1(int A, int B) { + return (A&-8388481)|(B&8388480); +} + +We currently generate: + ldr r2, .LCPI0_0 + and r0, r0, r2 + ldr r2, .LCPI0_1 + and r1, r1, r2 + orr r0, r1, r0 + bx lr + +We should be able to replace the second ldr+and with a bic (i.e. reuse the +constant which was already loaded). Not sure what's necessary to do that. + +//===---------------------------------------------------------------------===// + +Given the following on ARMv7: +int test1(int A, int B) { + return (A&-8388481)|(B&8388480); +} + +We currently generate: + bfc r0, #7, #16 + movw r2, #:lower16:8388480 + movt r2, #:upper16:8388480 + and r1, r1, r2 + orr r0, r1, r0 + bx lr + +The following is much shorter: + lsr r1, r1, #7 + bfi r0, r1, #7, #16 + bx lr + + +//===---------------------------------------------------------------------===// + +The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: + +int a(int x) { return __builtin_bswap32(x); } + +a: + mov r1, #255, 24 + mov r2, #255, 16 + and r1, r1, r0, lsr #8 + and r2, r2, r0, lsl #8 + orr r1, r1, r0, lsr #24 + orr r0, r2, r0, lsl #24 + orr r0, r0, r1 + bx lr + +Something like the following would be better (fewer instructions/registers): + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 + bx lr + +A custom Thumb version would also be a slight improvement over the generic +version. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp index c67c6a2..a35e884 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp +++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "AlphaMCAsmInfo.h" using namespace llvm; -AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) { +AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; PrivateGlobalPrefix = "$"; GPRel32Directive = ".gprel32"; diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/AlphaMCAsmInfo.h index c27065d..837844b 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.h +++ b/lib/Target/Alpha/AlphaMCAsmInfo.h @@ -14,14 +14,14 @@ #ifndef ALPHATARGETASMINFO_H #define ALPHATARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; struct AlphaMCAsmInfo : public MCAsmInfo { - explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT); + explicit AlphaMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp index 31470fb..5b9d4a2 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp @@ -15,7 +15,7 @@ using namespace llvm; -BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) { +BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) { GlobalPrefix = "_"; CommentString = "//"; HasSetDirective = false; diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/BlackfinMCAsmInfo.h index 0efc295..c372aa2 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h @@ -14,14 +14,14 @@ #ifndef BLACKFINTARGETASMINFO_H #define BLACKFINTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; struct BlackfinMCAsmInfo : public MCAsmInfo { - explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT); + explicit BlackfinMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 68445cf..25ba88a 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "SPUMCAsmInfo.h" using namespace llvm; -SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { +SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/SPUMCAsmInfo.h index 8d75ea8..7f850d3 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.h +++ b/lib/Target/CellSPU/SPUMCAsmInfo.h @@ -14,14 +14,14 @@ #ifndef SPUTARGETASMINFO_H #define SPUTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; struct SPULinuxMCAsmInfo : public MCAsmInfo { - explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT); + explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp index 7ae465d..4abeb2e 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "MBlazeMCAsmInfo.h" using namespace llvm; -MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) { +MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h index bccb418..9d6ff3a 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h @@ -14,15 +14,15 @@ #ifndef MBLAZETARGETASMINFO_H #define MBLAZETARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; class MBlazeMCAsmInfo : public MCAsmInfo { public: - explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT); + explicit MBlazeMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 8f97d25..cc350e8 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -1621,8 +1621,7 @@ const char* MSILWriter::getLibraryName(const GlobalVariable* GV) { } -const char* MSILWriter::getLibraryForSymbol(const StringRef &Name, - bool isFunction, +const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction, CallingConv::ID CallingConv) { // TODO: Read *.def file with function and libraries definitions. return "MSVCRT.DLL"; diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h index a95ae23..92a3abe 100644 --- a/lib/Target/MSIL/MSILWriter.h +++ b/lib/Target/MSIL/MSILWriter.h @@ -246,7 +246,7 @@ namespace llvm { const char* getLibraryName(const GlobalVariable* GV); - const char* getLibraryForSymbol(const StringRef &Name, bool isFunction, + const char* getLibraryForSymbol(StringRef Name, bool isFunction, CallingConv::ID CallingConv); void printExternals(); diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index cfb499d..3f44944 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "MSP430MCAsmInfo.h" using namespace llvm; -MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { +MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) { PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; PCSymbol="."; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MSP430MCAsmInfo.h index 8318029..f3138a2 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MSP430MCAsmInfo.h @@ -14,13 +14,14 @@ #ifndef MSP430TARGETASMINFO_H #define MSP430TARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; + struct MSP430MCAsmInfo : public MCAsmInfo { - explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT); + explicit MSP430MCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp index 89e3e11..fe48ab7 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MipsMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "MipsMCAsmInfo.h" using namespace llvm; -MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) { +MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h index 33a4b5e..15a867e 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MipsMCAsmInfo.h @@ -14,15 +14,15 @@ #ifndef MIPSTARGETASMINFO_H #define MIPSTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; class MipsMCAsmInfo : public MCAsmInfo { public: - explicit MipsMCAsmInfo(const Target &T, const StringRef &TT); + explicit MipsMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 6a4d0d6..7a948de 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -416,7 +416,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num, if (!TagName.empty()) Tmp += ", " + TagName; for (int i = 0; i<Num; i++) - Tmp += "," + utostr(Aux[i] && 0xff); + Tmp += "," + utostr(Aux[i] & 0xff); OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp); } diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp index b080542..1bcc497 100644 --- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp +++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp @@ -20,7 +20,7 @@ #include "PIC16ISelLowering.h" using namespace llvm; -PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) { +PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) { CommentString = ";"; GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL); GlobalDirective = "\tglobal\t"; diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h index e84db85..6e1c111 100644 --- a/lib/Target/PIC16/PIC16MCAsmInfo.h +++ b/lib/Target/PIC16/PIC16MCAsmInfo.h @@ -25,7 +25,7 @@ namespace llvm { const char *RomData16bitsDirective; const char *RomData32bitsDirective; public: - PIC16MCAsmInfo(const Target &T, const StringRef &TT); + PIC16MCAsmInfo(const Target &T, StringRef TT); virtual const char *getDataASDirective(unsigned size, unsigned AS) const; }; diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp index 535c6f7..d37d6d2 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp @@ -12,10 +12,9 @@ //===----------------------------------------------------------------------===// #include "SparcMCAsmInfo.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; -SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) { +SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = 0; // .xword is only supported by V9. diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/SparcMCAsmInfo.h index 12d6ef4..0cb6827 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/SparcMCAsmInfo.h @@ -14,13 +14,14 @@ #ifndef SPARCTARGETASMINFO_H #define SPARCTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; + struct SparcELFMCAsmInfo : public MCAsmInfo { - explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT); + explicit SparcELFMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp index f9ccc47..4f7f70b 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -16,7 +16,7 @@ #include "llvm/MC/MCSectionELF.h" using namespace llvm; -SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) { +SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h index 87908f2..a6a27e2 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h @@ -21,7 +21,7 @@ namespace llvm { class StringRef; struct SystemZMCAsmInfo : public MCAsmInfo { - explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT); + explicit SystemZMCAsmInfo(const Target &T, StringRef TT); virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index df52368..47c91df 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -294,7 +294,7 @@ namespace llvm { /// option is specified on the command line. If this returns false (default), /// the code generator is not allowed to assume that FP arithmetic arguments /// and results are never NaNs or +-Infs. - bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; } + bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; } /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume /// that the rounding mode of the FPU can change from its default. diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index a856e9c..f1e66ab 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -65,7 +65,7 @@ public: X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } bool X86ATTAsmParser:: -ParseInstruction(const StringRef &Name, SMLoc NameLoc, +ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The various flavors of pushf and popf use Requires<In32BitMode> and // Requires<In64BitMode>, but the assembler doesn't yet implement that. @@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); else if (Name == "pushfl") return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); + else if (Name == "pusha") + return Error(NameLoc, "pusha cannot be encoded in 64-bit mode"); } else { if (Name == "popfq") return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 73bc603..08e6486 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -17,7 +17,6 @@ #include "X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" -#include "X86COFF.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" @@ -35,6 +34,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" @@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetCOFF()) { bool Intrn = MF.getFunction()->hasInternalLinkage(); OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT); - OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer.EndCOFFSymbolDef(); } @@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { E = COFFMMI.externals_end(); I != E; ++I) { OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT); - OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer.EndCOFFSymbolDef(); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 09f150b..e67fc06 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); - // Subtract the pic base. - Expr - = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), - Ctx), - Ctx); - - break; + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + // Subtract the pic base. + Expr = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), + Ctx), + Ctx); + break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h deleted file mode 100644 index 0a8e4e6..0000000 --- a/lib/Target/X86/X86COFF.h +++ /dev/null @@ -1,95 +0,0 @@ -//===--- X86COFF.h - Some definitions from COFF documentations ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file just defines some symbols found in COFF documentation. They are -// used to emit function type information for COFF targets (Cygwin/Mingw32). -// -//===----------------------------------------------------------------------===// - -#ifndef X86COFF_H -#define X86COFF_H - -namespace COFF -{ -/// Storage class tells where and what the symbol represents -enum StorageClass { - C_EFCN = -1, ///< Physical end of function - C_NULL = 0, ///< No symbol - C_AUTO = 1, ///< External definition - C_EXT = 2, ///< External symbol - C_STAT = 3, ///< Static - C_REG = 4, ///< Register variable - C_EXTDEF = 5, ///< External definition - C_LABEL = 6, ///< Label - C_ULABEL = 7, ///< Undefined label - C_MOS = 8, ///< Member of structure - C_ARG = 9, ///< Function argument - C_STRTAG = 10, ///< Structure tag - C_MOU = 11, ///< Member of union - C_UNTAG = 12, ///< Union tag - C_TPDEF = 13, ///< Type definition - C_USTATIC = 14, ///< Undefined static - C_ENTAG = 15, ///< Enumeration tag - C_MOE = 16, ///< Member of enumeration - C_REGPARM = 17, ///< Register parameter - C_FIELD = 18, ///< Bit field - - C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block - C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function - C_EOS = 102, ///< End of structure - C_FILE = 103, ///< File name - C_LINE = 104, ///< Line number, reformatted as symbol - C_ALIAS = 105, ///< Duplicate tag - C_HIDDEN = 106 ///< External symbol in dmert public lib -}; - -/// The type of the symbol. This is made up of a base type and a derived type. -/// For example, pointer to int is "pointer to T" and "int" -enum SymbolType { - T_NULL = 0, ///< No type info - T_ARG = 1, ///< Void function argument (only used by compiler) - T_VOID = 1, ///< The same as above. Just named differently in some specs. - T_CHAR = 2, ///< Character - T_SHORT = 3, ///< Short integer - T_INT = 4, ///< Integer - T_LONG = 5, ///< Long integer - T_FLOAT = 6, ///< Floating point - T_DOUBLE = 7, ///< Double word - T_STRUCT = 8, ///< Structure - T_UNION = 9, ///< Union - T_ENUM = 10, ///< Enumeration - T_MOE = 11, ///< Member of enumeration - T_UCHAR = 12, ///< Unsigned character - T_USHORT = 13, ///< Unsigned short - T_UINT = 14, ///< Unsigned integer - T_ULONG = 15 ///< Unsigned long -}; - -/// Derived type of symbol -enum SymbolDerivedType { - DT_NON = 0, ///< No derived type - DT_PTR = 1, ///< Pointer to T - DT_FCN = 2, ///< Function returning T - DT_ARY = 3 ///< Array of T -}; - -/// Masks for extracting parts of type -enum SymbolTypeMasks { - N_BTMASK = 017, ///< Mask for base type - N_TMASK = 060 ///< Mask for derived type -}; - -/// Offsets of parts of type -enum Shifts { - N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT) -}; - -} - -#endif // X86COFF_H diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cdde24a..ce13707 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GVOpFlags = GVFlags; // Prepare for inserting code in the local-value area. - MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + SavePoint SaveInsertPt = enterLocalValueArea(); if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; @@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { return false; // First issue a copy to GR16_ABCD or GR32_ABCD. - unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) - .addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CopyReg).addReg(InputReg); // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1a63474..b3c4886 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If the tailcall address may be in a register, then make sure it's // possible to register allocate for it. In 32-bit, the call address can // only target EAX, EDX, or ECX since the tail call must be scheduled after - // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, - // RDI, R8, R9, R11. - if (!isa<GlobalAddressSDNode>(Callee) && + // callee-saved registers are restored. These happen to be the same + // registers used to pass 'inreg' arguments so watch out for those. + if (!Subtarget->is64Bit() && + !isa<GlobalAddressSDNode>(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { - unsigned Limit = Subtarget->is64Bit() ? 8 : 3; unsigned NumInRegs = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - if (VA.isRegLoc()) { - if (++NumInRegs == Limit) + if (!VA.isRegLoc()) + continue; + unsigned Reg = VA.getLocReg(); + switch (Reg) { + default: break; + case X86::EAX: case X86::EDX: case X86::ECX: + if (++NumInRegs == 3) return false; + break; } } } @@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned immOpc, unsigned LoadOpc, unsigned CXchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, @@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(tt); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); @@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(EAXreg); // insert branch @@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, const TargetRegisterClass *RC = X86::GR32RegisterClass; const unsigned LoadOpc = X86::MOV32rm; - const unsigned copyOpc = X86::MOV32rr; const unsigned NotOpc = X86::NOT32r; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); @@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); MIB.addReg(t5); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); MIB.addReg(t6); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); @@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3); MIB.addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4); MIB.addReg(X86::EDX); // insert branch @@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); else MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); @@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, (*MIB).setMemRefs(mInstr->memoperands_begin(), mInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(X86::EAX); // insert branch @@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMXOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMNAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass, true); case X86::ATOMMIN32: @@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMXOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMNAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass, true); case X86::ATOMMIN16: @@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMXOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMNAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. @@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMXOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMNAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass, true); case X86::ATOMMIN64: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2d28e5c..4e4daa4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -764,7 +764,6 @@ namespace llvm { unsigned immOpc, unsigned loadOpc, unsigned cxchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 97578af..cc3fdf1 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -106,6 +106,7 @@ class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } +class VEX_L { bit hasVEX_L = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register // to be encoded in a immediate field? + bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{33} = hasVEX_WPrefix; let TSFlags{34} = hasVEX_4VPrefix; let TSFlags{35} = hasVEX_i8ImmReg; + let TSFlags{36} = hasVEX_L; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f762b58..ad0217a 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -453,7 +453,13 @@ namespace X86II { // VEX_I8IMM - Specifies that the last register used in a AVX instruction, // must be encoded in the i8 immediate field. This usually happens in // instructions with 4 operands. - VEX_I8IMM = 1ULL << 35 + VEX_I8IMM = 1ULL << 35, + + // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + // instruction uses 256-bit wide registers. This is usually auto detected if + // a VR256 register is used, but some AVX instructions also have this field + // marked when using a f256 memory references. + VEX_L = 1ULL << 36 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ab0005b..ebe161b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, @@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), // Convert packed single/double fp to doubleword let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; @@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; @@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; +let isAsmParserOnly = 1 in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; +} + // Convert packed single to packed double -let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, - Requires<[HasAVX]>; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, - Requires<[HasAVX]>; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; @@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), // Convert packed double to packed single let isAsmParserOnly = 1 in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; -// FIXME: the memory form of this instruction should described using -// use extra asm syntax + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; @@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in { "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", SSEPackedDouble>, OpSize, VEX_4V; + let Pattern = []<dag> in { + defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } } let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // SSE3 - Conversion Instructions //===---------------------------------------------------------------------===// +// Convert Packed Double FP to Packed DW Integers let isAsmParserOnly = 1, Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; + +// Convert Packed DW Integers to Packed Double FP +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 633ddd4..23b0666 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::VEX_W) VEX_W = 1; + if (TSFlags & X86II::VEX_L) + VEX_L = 1; + switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); case X86II::T8: // 0F 38 diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp index 5f6feae..42ab1b3 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp @@ -10,7 +10,7 @@ #include "XCoreMCAsmInfo.h" using namespace llvm; -XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) { +XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) { SupportsDebugInformation = true; Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/XCoreMCAsmInfo.h index 01f8e48..8403922 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/XCoreMCAsmInfo.h @@ -14,14 +14,15 @@ #ifndef XCORETARGETASMINFO_H #define XCORETARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; + class XCoreMCAsmInfo : public MCAsmInfo { public: - explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT); + explicit XCoreMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm |