diff options
Diffstat (limited to 'lib/Target/ARM/ARMISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 445 |
1 files changed, 349 insertions, 96 deletions
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index efd6d2b..2c51de2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -16,24 +16,25 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -78,6 +79,8 @@ public: return "ARM Instruction Selection"; } + virtual void PreprocessISelDAG(); + /// getI32Imm - Return a target constant of type i32 with the specified /// value. inline SDValue getI32Imm(unsigned Imm) { @@ -255,6 +258,8 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -265,15 +270,16 @@ private: char ConstraintCode, std::vector<SDValue> &OutOps); - // Form pairs of consecutive S, D, or Q registers. - SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1); - SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); - SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + // Form pairs of consecutive R, S, D, or Q registers. + SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); // Form sequences of 4 consecutive S, D, or Q registers. - SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); // Get the alignment operand for a NEON VLD or VST instruction. SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector); @@ -326,6 +332,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale, return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; } +void ARMDAGToDAGISel::PreprocessISelDAG() { + if (!Subtarget->hasV6T2Ops()) + return; + + bool isThumb2 = Subtarget->isThumb(); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (N->getOpcode() != ISD::ADD) + continue; + + // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with + // leading zeros, followed by consecutive set bits, followed by 1 or 2 + // trailing zeros, e.g. 1020. + // Transform the expression to + // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number + // of trailing zeros of c2. The left shift would be folded as an shifter + // operand of 'add' and the 'and' and 'srl' would become a bits extraction + // node (UBFX). + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned And_imm = 0; + if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { + if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) + std::swap(N0, N1); + } + if (!And_imm) + continue; + + // Check if the AND mask is an immediate of the form: 000.....1111111100 + unsigned TZ = CountTrailingZeros_32(And_imm); + if (TZ != 1 && TZ != 2) + // Be conservative here. Shifter operands aren't always free. e.g. On + // Swift, left shifter operand of 1 / 2 for free but others are not. + // e.g. + // ubfx r3, r1, #16, #8 + // ldr.w r3, [r0, r3, lsl #2] + // vs. + // mov.w r9, #1020 + // and.w r2, r9, r1, lsr #14 + // ldr r2, [r0, r2] + continue; + And_imm >>= TZ; + if (And_imm & (And_imm + 1)) + continue; + + // Look for (and (srl X, c1), c2). + SDValue Srl = N1.getOperand(0); + unsigned Srl_imm = 0; + if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || + (Srl_imm <= 2)) + continue; + + // Make sure first operand is not a shifter operand which would prevent + // folding of the left shift. + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (isThumb2) { + if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) + continue; + } else { + if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || + SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) + continue; + } + + // Now make the transformation. + Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl.getOperand(0), + CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); + N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + Srl, CurDAG->getConstant(And_imm, MVT::i32)); + N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1, CurDAG->getConstant(TZ, MVT::i32)); + CurDAG->UpdateNodeOperands(N, N0, N1); + } +} + /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at /// least on current ARM implementations) which should be avoidded. @@ -1444,9 +1531,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -/// PairSRegs - Form a D register from a pair of S registers. -/// -SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form a GPRPair pseudo register from a pair of GPR regs. +SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = + CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); +} + +/// \brief Form a D register from a pair of S registers. +SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); @@ -1456,9 +1553,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// PairDRegs - Form a quad register from a pair of D registers. -/// -SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form a quad register from a pair of D registers. +SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); @@ -1467,9 +1563,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. -/// -SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form 4 consecutive D registers from a pair of Q registers. +SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); @@ -1478,9 +1573,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// QuadSRegs - Form 4 consecutive S registers. -/// -SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive S registers. +SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = @@ -1494,9 +1588,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } -/// QuadDRegs - Form 4 consecutive D registers. -/// -SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive D registers. +SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); @@ -1509,9 +1602,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } -/// QuadQRegs - Form 4 consecutive Q registers. -/// -SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive Q registers. +SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); @@ -1784,7 +1876,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V0 = N->getOperand(Vec0Idx + 0); SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) - SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); else { SDValue V2 = N->getOperand(Vec0Idx + 2); // If it's a vst3, form a quad D-register and leave the last part as @@ -1792,13 +1884,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) : N->getOperand(Vec0Idx + 3); - SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); } } else { // Form a QQ register. SDValue Q0 = N->getOperand(Vec0Idx); SDValue Q1 = N->getOperand(Vec0Idx + 1); - SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); } unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : @@ -1840,7 +1932,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(Vec0Idx + 3); - SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. This is always an updating store, so that it // provides the address to the second store for the odd subregs. @@ -1950,18 +2042,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) { if (is64BitVector) - SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); else - SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); } else { SDValue V2 = N->getOperand(Vec0Idx + 2); SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(Vec0Idx + 3); if (is64BitVector) - SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); else - SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); } Ops.push_back(SuperReg); Ops.push_back(getI32Imm(Lane)); @@ -2087,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDValue V0 = N->getOperand(FirstTblReg + 0); SDValue V1 = N->getOperand(FirstTblReg + 1); if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); else { SDValue V2 = N->getOperand(FirstTblReg + 2); // If it's a vtbl3, form a quad D-register and leave the last part as @@ -2095,7 +2187,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); } SmallVector<SDValue, 6> Ops; @@ -2113,10 +2205,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, if (!Subtarget->hasV6T2Ops()) return NULL; - unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + unsigned Opc = isSigned + ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); - // For unsigned extracts, check for a shift right and mask unsigned And_imm = 0; if (N->getOpcode() == ISD::AND) { @@ -2134,7 +2226,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // Note: The width operand is encoded as width-1. unsigned Width = CountTrailingOnes_32(And_imm) - 1; unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { + // It's cheaper to use a right shift to extract the top bits. + if (Subtarget->isThumb()) { + Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + + // ARM models shift instructions as MOVsi with shifter operand. + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); + SDValue ShOpc = + CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), + MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + } + SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), @@ -2411,7 +2525,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { EVT VT = N->getValueType(0); if (!VT.is128BitVector() || N->getNumOperands() != 2) llvm_unreachable("unexpected CONCAT_VECTORS"); - return PairDRegs(VT, N->getOperand(0), N->getOperand(1)); + return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { @@ -2441,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case ISD::XOR: { // Select special operations if XOR node forms integer ABS pattern SDNode *ResNode = SelectABSOp(N); @@ -2790,13 +2910,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); if (EltVT == MVT::f64) { assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); - return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); + return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); } assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); if (NumElts == 2) - return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); + return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); - return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1), + return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); } @@ -3009,17 +3129,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Chain = N->getOperand(0); - unsigned NewOpc = ARM::LDREXD; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - NewOpc = ARM::t2LDREXD; + bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD; // arm_ldrexd returns a i64 value in {i32, i32} std::vector<EVT> ResTys; - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); + if (isThumb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + } else + ResTys.push_back(MVT::Untyped); ResTys.push_back(MVT::Other); - // place arguments in the right order + // Place arguments in the right order. SmallVector<SDValue, 7> Ops; Ops.push_back(MemAddr); Ops.push_back(getAL(CurDAG)); @@ -3032,30 +3154,33 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); - // Until there's support for specifing explicit register constraints - // like the use of even/odd register pair, hardcode ldrexd to always - // use the pair [R0, R1] to hold the load result. - Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0, - SDValue(Ld, 0), SDValue(0,0)); - Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1, - SDValue(Ld, 1), Chain.getValue(1)); - // Remap uses. - SDValue Glue = Chain.getValue(1); + SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); if (!SDValue(N, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R0, MVT::i32, Glue); - Glue = Result.getValue(2); + SDValue Result; + if (isThumb) + Result = SDValue(Ld, 0); + else { + SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); + SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); + Result = SDValue(ResNode,0); + } ReplaceUses(SDValue(N, 0), Result); } if (!SDValue(N, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R1, MVT::i32, Glue); - Glue = Result.getValue(2); + SDValue Result; + if (isThumb) + Result = SDValue(Ld, 1); + else { + SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); + SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); + Result = SDValue(ResNode,0); + } ReplaceUses(SDValue(N, 1), Result); } - - ReplaceUses(SDValue(N, 2), SDValue(Ld, 2)); + ReplaceUses(SDValue(N, 2), OutChain); return NULL; } @@ -3066,38 +3191,25 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Val1 = N->getOperand(3); SDValue MemAddr = N->getOperand(4); - // Until there's support for specifing explicit register constraints - // like the use of even/odd register pair, hardcode strexd to always - // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored. - Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0, - SDValue(0, 0)); - Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1)); - - SDValue Glue = Chain.getValue(1); - Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R2, MVT::i32, Glue); - Glue = Val0.getValue(1); - Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R3, MVT::i32, Glue); - // Store exclusive double return a i32 value which is the return status // of the issued store. - std::vector<EVT> ResTys; - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::Other); + EVT ResTys[] = { MVT::i32, MVT::Other }; - // place arguments in the right order + bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + // Place arguments in the right order. SmallVector<SDValue, 7> Ops; - Ops.push_back(Val0); - Ops.push_back(Val1); + if (isThumb) { + Ops.push_back(Val0); + Ops.push_back(Val1); + } else + // arm_strexd uses GPRPair. + Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); Ops.push_back(MemAddr); Ops.push_back(getAL(CurDAG)); Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - unsigned NewOpc = ARM::STREXD; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - NewOpc = ARM::t2STREXD; + unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), Ops.size()); @@ -3295,7 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // Form a REG_SEQUENCE to force register allocation. SDValue V0 = N->getOperand(0); SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); SmallVector<SDValue, 6> Ops; Ops.push_back(RegSeq); @@ -3325,11 +3437,152 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectAtomic64(N, ARM::ATOMSWAP6432); case ARMISD::ATOMCMPXCHG64_DAG: return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); + + case ARMISD::ATOMMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMMIN6432); + case ARMISD::ATOMUMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMUMIN6432); + case ARMISD::ATOMMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMMAX6432); + case ARMISD::ATOMUMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMUMAX6432); } return SelectCode(N); } +SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector<SDValue> AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>( + N->getOperand(InlineAsm::Op_AsmString)); + StringRef AsmString = StringRef(S->getSymbol()); + + // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. + // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require + // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs + // respectively. Since there is no constraint to explicitly specify a + // reg pair, we search %H operand inside the asm string. If it is found, the + // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. + if (AsmString.find(":H}") == StringRef::npos) + return NULL; + + DebugLoc dl = N->getDebugLoc(); + SDValue Glue = N->getOperand(NumOps-1); + + // Glue node will be appended late. + for(unsigned i = 0; i < NumOps -1; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + continue; + + assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); + unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + GU = T1.getNode(); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + AsmNodeOperands.push_back(Glue); + if (!Changed) + return NULL; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], + AsmNodeOperands.size()); + New->setNodeId(-1); + return New.getNode(); +} + + bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { |