diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp | 848 |
1 files changed, 529 insertions, 319 deletions
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index b4f4b1b..1932e74 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -35,6 +35,18 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +// If I is a shifted mask, set the size (Size) and the first bit of the +// mask (Pos), and return true. +// For example, if I is 0x003ff800, (Pos, Size) = (11, 11). +static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { + if (!isUInt<32>(I) || !isShiftedMask_32(I)) + return false; + + Size = CountPopulation_32(I); + Pos = CountTrailingZeros_32(I); + return true; +} + const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { case MipsISD::JmpLink: return "MipsISD::JmpLink"; @@ -61,27 +73,38 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; + case MipsISD::Sync: return "MipsISD::Sync"; + case MipsISD::Ext: return "MipsISD::Ext"; + case MipsISD::Ins: return "MipsISD::Ins"; default: return NULL; } } MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) - : TargetLowering(TM, new MipsTargetObjectFile()) { - Subtarget = &TM.getSubtarget<MipsSubtarget>(); + : TargetLowering(TM, new MipsTargetObjectFile()), + Subtarget(&TM.getSubtarget<MipsSubtarget>()), + HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) { // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); + if (HasMips64) + addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass); + // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) - if (!Subtarget->isFP64bit()) + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, Mips::FGR64RegisterClass); + else addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + } // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); @@ -100,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); @@ -115,6 +139,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -126,10 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); - if (!Subtarget->isMips32r2()) + if (!Subtarget->hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); + if (!Subtarget->hasMips64r2()) + setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); @@ -159,7 +191,14 @@ MipsTargetLowering(MipsTargetMachine &TM) // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + + setInsertFencesForAtomic(true); if (Subtarget->isSingleFloat()) setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); @@ -180,6 +219,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setMinFunctionAlignment(2); @@ -190,7 +231,12 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionSelectorRegister(Mips::A1); } -MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const { +bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16; +} + +EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -348,7 +394,7 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->isMips32() && SelectMadd(N, &DAG)) + if (Subtarget->hasMips32() && SelectMadd(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -360,7 +406,7 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->isMips32() && SelectMsub(N, &DAG)) + if (Subtarget->hasMips32() && SelectMsub(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -372,6 +418,9 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + EVT Ty = N->getValueType(0); + unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; + unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : MipsISD::DivRemU; DebugLoc dl = N->getDebugLoc(); @@ -383,7 +432,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, // insert MFLO if (N->hasAnyUseOfValue(0)) { - SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32, + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); @@ -393,7 +442,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, // insert MFHI if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, - Mips::HI, MVT::i32, InGlue); + HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } @@ -490,6 +539,101 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); } +static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + // Pattern match EXT. + // $dst = and ((sra or srl) $src , pos), (2**size - 1) + // => ext $dst, $src, size, pos + if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) + return SDValue(); + + SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); + + // Op's first operand must be a shift right. + if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL) + return SDValue(); + + // The second operand of the shift must be an immediate. + uint64_t Pos; + ConstantSDNode *CN; + if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1)))) + return SDValue(); + + Pos = CN->getZExtValue(); + + uint64_t SMPos, SMSize; + // Op's second operand must be a shifted mask. + if (!(CN = dyn_cast<ConstantSDNode>(Mask)) || + !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize)) + return SDValue(); + + // Return if the shifted mask does not start at bit 0 or the sum of its size + // and Pos exceeds the word's size. + if (SMPos != 0 || Pos + SMSize > 32) + return SDValue(); + + return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32, + ShiftRight.getOperand(0), + DAG.getConstant(Pos, MVT::i32), + DAG.getConstant(SMSize, MVT::i32)); +} + +static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + // Pattern match INS. + // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), + // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 + // => ins $dst, $src, size, pos, $src1 + if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) + return SDValue(); + + SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); + uint64_t SMPos0, SMSize0, SMPos1, SMSize1; + ConstantSDNode *CN; + + // See if Op's first operand matches (and $src1 , mask0). + if (And0.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) || + !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) + return SDValue(); + + // See if Op's second operand matches (and (shl $src, pos), mask1). + if (And1.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) || + !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + return SDValue(); + + // The shift masks must have the same position and size. + if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + if (Shl.getOpcode() != ISD::SHL) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + if (Shamt != SMPos0) + return SDValue(); + + return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32, + Shl.getOperand(0), + DAG.getConstant(SMPos0, MVT::i32), + DAG.getConstant(SMSize0, MVT::i32), + And0.getOperand(0)); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -506,6 +650,10 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return PerformDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); + case ISD::AND: + return PerformANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return PerformORCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -527,6 +675,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); } return SDValue(); } @@ -733,13 +883,13 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); + unsigned OldVal = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Oldval = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned AndRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -758,61 +908,38 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // thisMBB: // ... - // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0) // fallthrough --> loopMBB - - // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. - - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr).addFrameIndex(fi).addImm(0); - } BB->addSuccessor(loopMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(exitMBB); // loopMBB: // ll oldval, 0(ptr) - // or dest, $0, oldval - // <binop> tmp1, oldval, incr - // sc tmp1, 0(ptr) - // beq tmp1, $0, loopMBB + // <binop> storeval, oldval, incr + // sc success, storeval, 0(ptr) + // beq success, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { - // and tmp2, oldval, incr - // nor tmp1, $0, tmp2 - BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + // and andres, oldval, incr + // nor storeval, $0, andres + BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr); + BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal) + .addReg(Mips::ZERO).addReg(AndRes); } else if (BinOpcode) { - // <binop> tmp1, oldval, incr - BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr); + // <binop> storeval, oldval, incr + BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { - // lw tmp2, fi(sp) // load incr from stack - // or tmp1, $zero, tmp2 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + StoreVal = Incr; } - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -833,33 +960,34 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Addr = RegInfo.createVirtualRegister(RC); - unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RC); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Newval = RegInfo.createVirtualRegister(RC); - unsigned Oldval = RegInfo.createVirtualRegister(RC); + unsigned NewVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned Incr2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp3 = RegInfo.createVirtualRegister(RC); - unsigned Tmp4 = RegInfo.createVirtualRegister(RC); - unsigned Tmp5 = RegInfo.createVirtualRegister(RC); - unsigned Tmp6 = RegInfo.createVirtualRegister(RC); - unsigned Tmp7 = RegInfo.createVirtualRegister(RC); - unsigned Tmp8 = RegInfo.createVirtualRegister(RC); - unsigned Tmp9 = RegInfo.createVirtualRegister(RC); - unsigned Tmp10 = RegInfo.createVirtualRegister(RC); - unsigned Tmp11 = RegInfo.createVirtualRegister(RC); - unsigned Tmp12 = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned AndRes = RegInfo.createVirtualRegister(RC); + unsigned BinOpRes = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned SllRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = BB; ++It; MF->insert(It, loopMBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -868,111 +996,104 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(loopMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); + // thisMBB: - // addiu tmp1,$0,-4 # 0xfffffffc - // and addr,ptr,tmp1 - // andi tmp2,ptr,3 - // sll shift,tmp2,3 - // ori tmp3,$0,255 # 0xff - // sll mask,tmp3,shift + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt // nor mask2,$0,mask - // andi tmp4,incr,255 - // sll incr2,tmp4,shift - // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0) - - // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. + // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + .addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + .addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + .addReg(ShiftAmt).addReg(MaskUpper); BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - if (BinOpcode != Mips::SUBu) { - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift); - } else { - BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift); - } + BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr2).addFrameIndex(fi).addImm(0); - } - BB->addSuccessor(loopMBB); + // atomic.load.binop + // loopMBB: + // ll oldval,0(alignedaddr) + // binop binopres,oldval,incr2 + // and newval,binopres,mask + // and maskedoldval0,oldval,mask2 + // or storeval,maskedoldval0,newval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loopMBB + + // atomic.swap // loopMBB: - // ll oldval,0(addr) - // binop tmp7,oldval,incr2 - // and newval,tmp7,mask - // and tmp8,oldval,mask2 - // or tmp9,tmp8,newval - // sc tmp9,0(addr) - // beq tmp9,$0,loopMBB + // ll oldval,0(alignedaddr) + // and newval,incr2,mask + // and maskedoldval0,oldval,mask2 + // or storeval,maskedoldval0,newval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loopMBB + BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { - // and tmp6, oldval, incr2 - // nor tmp7, $0, tmp6 - BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2); - BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); - } else if (BinOpcode == Mips::SUBu) { - // addu tmp7, oldval, incr2 - BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2); + // and andres, oldval, incr2 + // nor binopres, $0, andres + // and newval, binopres, mask + BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes) + .addReg(Mips::ZERO).addReg(AndRes); + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else if (BinOpcode) { - // <binop> tmp7, oldval, incr2 - BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2); - } else { - // lw tmp6, fi(sp) // load incr2 from stack - // or tmp7, $zero, tmp6 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); + // <binop> binopres, oldval, incr2 + // and newval, binopres, mask + BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); + } else {// atomic.swap + // and newval, incr2, mask + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); } - BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0); + + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal0).addReg(NewVal); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // and tmp10,oldval,mask - // srl tmp11,tmp10,shift - // sll tmp12,tmp11,24 - // sra dest,tmp12,24 - BB = exitMBB; + .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); + + // sinkMBB: + // and maskedoldval1,oldval,mask + // srl srlres,maskedoldval1,shiftamt + // sll sllres,srlres,24 + // sra dest,sllres,24 + BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - // reverse order - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) - .addReg(Tmp12).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12) - .addReg(Tmp11).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11) - .addReg(Tmp10).addReg(Shift); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10) - .addReg(Oldval).addReg(Mask); + + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + .addReg(ShiftAmt).addReg(MaskedOldVal1); + BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + .addReg(SrlRes).addImm(ShiftImm); + BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -989,11 +1110,10 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); - unsigned Oldval = MI->getOperand(2).getReg(); - unsigned Newval = MI->getOperand(3).getReg(); + unsigned OldVal = MI->getOperand(2).getReg(); + unsigned NewVal = MI->getOperand(3).getReg(); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1012,26 +1132,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - int fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - // thisMBB: // ... - // sw newval, fi(sp) // store newval to stack // fallthrough --> loop1MBB - - // Note: storing newval to stack before the loop and then loading it from - // stack in block loop2MBB is necessary to prevent MachineLICM pass to - // hoist "or" instruction out of the block loop2MBB. - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Newval).addFrameIndex(fi).addImm(0); BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); // loop1MBB: // ll dest, 0(ptr) @@ -1039,27 +1147,20 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB = loop1MBB; BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BNE)) - .addReg(Dest).addReg(Oldval).addMBB(exitMBB); - BB->addSuccessor(exitMBB); - BB->addSuccessor(loop2MBB); + .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: - // lw tmp2, fi(sp) // load newval from stack - // or tmp1, $0, tmp2 - // sc tmp1, 0(ptr) - // beq tmp1, $0, loop1MBB + // sc success, newval, 0(ptr) + // beq success, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(NewVal).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -1077,36 +1178,39 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); - unsigned Oldval = MI->getOperand(2).getReg(); - unsigned Newval = MI->getOperand(3).getReg(); + unsigned CmpVal = MI->getOperand(2).getReg(); + unsigned NewVal = MI->getOperand(3).getReg(); - unsigned Addr = RegInfo.createVirtualRegister(RC); - unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RC); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Oldval2 = RegInfo.createVirtualRegister(RC); - unsigned Oldval3 = RegInfo.createVirtualRegister(RC); - unsigned Oldval4 = RegInfo.createVirtualRegister(RC); - unsigned Newval2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp3 = RegInfo.createVirtualRegister(RC); - unsigned Tmp4 = RegInfo.createVirtualRegister(RC); - unsigned Tmp5 = RegInfo.createVirtualRegister(RC); - unsigned Tmp6 = RegInfo.createVirtualRegister(RC); - unsigned Tmp7 = RegInfo.createVirtualRegister(RC); - unsigned Tmp8 = RegInfo.createVirtualRegister(RC); - unsigned Tmp9 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned SllRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = BB; ++It; MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1115,76 +1219,90 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); + + // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: - // addiu tmp1,$0,-4 # 0xfffffffc - // and addr,ptr,tmp1 - // andi tmp2,ptr,3 - // sll shift,tmp2,3 - // ori tmp3,$0,255 # 0xff - // sll mask,tmp3,shift + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt // nor mask2,$0,mask - // andi tmp4,oldval,255 - // sll oldval2,tmp4,shift - // andi tmp5,newval,255 - // sll newval2,tmp5,shift + // andi maskedcmpval,cmpval,255 + // sll shiftedcmpval,maskedcmpval,shiftamt + // andi maskednewval,newval,255 + // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + .addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + .addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + .addReg(ShiftAmt).addReg(MaskUpper); BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift); - BB->addSuccessor(loop1MBB); + BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal) + .addReg(CmpVal).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal) + .addReg(ShiftAmt).addReg(MaskedCmpVal); + BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal) + .addReg(NewVal).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal) + .addReg(ShiftAmt).addReg(MaskedNewVal); // loop1MBB: - // ll oldval3,0(addr) - // and oldval4,oldval3,mask - // bne oldval4,oldval2,exitMBB + // ll oldval,0(alginedaddr) + // and maskedoldval0,oldval,mask + // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::BNE)) - .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB); - BB->addSuccessor(exitMBB); - BB->addSuccessor(loop2MBB); + .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); // loop2MBB: - // and tmp6,oldval3,mask2 - // or tmp7,tmp6,newval2 - // sc tmp7,0(addr) - // beq tmp7,$0,loop1MBB + // and maskedoldval1,oldval,mask2 + // or storeval,maskedoldval1,shiftednewval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp7) - .addReg(Tmp7).addReg(Addr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal1).addReg(ShiftedNewVal); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - // exitMBB: - // srl tmp8,oldval4,shift - // sll tmp9,tmp8,24 - // sra dest,tmp9,24 - BB = exitMBB; + // sinkMBB: + // srl srlres,maskedoldval0,shiftamt + // sll sllres,srlres,24 + // sra dest,sllres,24 + BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - // reverse order - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) - .addReg(Tmp9).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9) - .addReg(Tmp8).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8) - .addReg(Oldval4).addReg(Shift); + + BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + .addReg(ShiftAmt).addReg(MaskedOldVal0); + BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + .addReg(SrlRes).addImm(ShiftImm); + BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } //===----------------------------------------------------------------------===// @@ -1267,9 +1385,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); @@ -1292,21 +1410,26 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_GOT); - GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA); - SDValue ResNode = DAG.getLoad(MVT::i32, dl, + EVT ValTy = Op.getValueType(); + bool HasGotOfst = (GV->hasInternalLinkage() || + (GV->hasLocalLinkage() && !isa<Function>(GV))); + unsigned GotFlag = IsN64 ? + (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : + MipsII::MO_GOT; + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); + GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA); + SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, MachinePointerInfo(), false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. - if (!GV->hasInternalLinkage() && - (!GV->hasLocalLinkage() || isa<Function>(GV))) + if (!HasGotOfst) return ResNode; - SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); - return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, + IsN64 ? MipsII::MO_GOT_OFST : + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo); + return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo); } SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, @@ -1361,11 +1484,11 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); std::pair<SDValue, SDValue> CallResult = LowerCallTo(DAG.getEntryNode(), - (const Type *) Type::getInt32Ty(*DAG.getContext()), + (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); @@ -1557,6 +1680,25 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// TODO: set SType according to the desired memory barrier behavior. +SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op, + SelectionDAG& DAG) const { + unsigned SType = 0; + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, MVT::i32)); +} + +SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG& DAG) const { + // FIXME: Need pseudo-fence for 'singlethread' fences + // FIXME: Set SType for weaker fences where supported/appropriate. + unsigned SType = 0; + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, MVT::i32)); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1679,55 +1821,109 @@ static const unsigned O32IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; +// Return next O32 integer argument register. +static unsigned getNextIntArgReg(unsigned Reg) { + assert((Reg == Mips::A0) || (Reg == Mips::A2)); + return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; +} + // Write ByVal Arg to arg registers and stack. static void -WriteByValArg(SDValue& Chain, DebugLoc dl, +WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, SmallVector<SDValue, 8>& MemOpChains, int& LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, - MVT PtrType) { - unsigned FirstWord = VA.getLocMemOffset() / 4; - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - unsigned LastWord = FirstWord + NumWords; - unsigned CurWord; - - // copy the first 4 words of byval arg to registers A0 - A3 - for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize); - ++CurWord) { + MVT PtrType, bool isLittle) { + unsigned LocMemOffset = VA.getLocMemOffset(); + unsigned Offset = 0; + uint32_t RemainingSize = Flags.getByValSize(); + unsigned ByValAlign = Flags.getByValAlign(); + + // Copy the first 4 words of byval arg to registers A0 - A3. + // FIXME: Use a stricter alignment if it enables better optimization in passes + // run later. + for (; RemainingSize >= 4 && LocMemOffset < 4 * 4; + Offset += 4, RemainingSize -= 4, LocMemOffset += 4) { SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); + DAG.getConstant(Offset, MVT::i32)); SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, std::min(ByValAlign, + (unsigned )4)); MemOpChains.push_back(LoadVal.getValue(1)); - unsigned DstReg = O32IntRegs[CurWord]; + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); } - // copy remaining part of byval arg to stack. - if (CurWord < LastWord) { - unsigned SizeInBytes = (LastWord - CurWord) * 4; - SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); - LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(SizeInBytes, MVT::i32), - /*Align*/4, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); + if (RemainingSize == 0) + return; + + // If there still is a register available for argument passing, write the + // remaining part of the structure to it using subword loads and shifts. + if (LocMemOffset < 4 * 4) { + assert(RemainingSize <= 3 && RemainingSize >= 1 && + "There must be one to three bytes remaining."); + unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize); + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )4); + SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSize * 8), false, + false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // If target is big endian, shift it to the most significant half-word or + // byte. + if (!isLittle) + LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal, + DAG.getConstant(32 - LoadSize * 8, MVT::i32)); + + Offset += LoadSize; + RemainingSize -= LoadSize; + + // Read second subword if necessary. + if (RemainingSize != 0) { + assert(RemainingSize == 1 && "There must be one byte remaining."); + LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )2); + SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::i8, false, false, Alignment); + MemOpChains.push_back(Subword.getValue(1)); + // Insert the loaded byte to LoadVal. + // FIXME: Use INS if supported by target. + unsigned ShiftAmt = isLittle ? 16 : 8; + SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword, + DAG.getConstant(ShiftAmt, MVT::i32)); + LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift); + } + + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; + RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); + return; } + + // Create a fixed object on stack at offset LocMemOffset and copy + // remaining part of byval arg to it using memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. SDValue -MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, +MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -1757,8 +1953,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset, - true)); + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + SDValue Chain, CallSeqStart, ByValChain; + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); + Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); + ByValChain = InChain; // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. @@ -1818,8 +2019,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); - RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); + unsigned LocRegLo = VA.getLocReg(); + unsigned LocRegHigh = getNextIntArgReg(LocRegLo); + RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); + RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } @@ -1852,8 +2055,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, "No support for ByVal args by ABIs other than O32 yet."); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, - VA, Flags, getPointerTy()); + WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, + DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; } @@ -1875,6 +2078,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (LastFI) MipsFI->extendOutArgFIRange(FirstFI, LastFI); + // If a memcpy has been created to copy a byval arg to a stack, replace the + // chain input of CallSeqStart with ByValChain. + if (InChain != ByValChain) + DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, + NextStackOffsetVal); + // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -2071,12 +2280,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (RegVT == MVT::i32) RC = Mips::CPURegsRegisterClass; + else if (RegVT == MVT::i64) + RC = Mips::CPU64RegsRegisterClass; else if (RegVT == MVT::f32) RC = Mips::FGR32RegisterClass; - else if (RegVT == MVT::f64) { - if (!Subtarget->isSingleFloat()) - RC = Mips::AFGR64RegisterClass; - } else + else if (RegVT == MVT::f64) + RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass; + else llvm_unreachable("RegVT not supported by FormalArguments Lowering"); // Transform the arguments stored on @@ -2105,7 +2315,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), - VA.getLocReg()+1, RC); + getNextIntArgReg(ArgReg), RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); if (!Subtarget->isLittle()) std::swap(ArgValue, ArgValue2); @@ -2313,7 +2523,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: |