diff options
author | dim <dim@FreeBSD.org> | 2011-02-26 22:03:50 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-02-26 22:03:50 +0000 |
commit | c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc (patch) | |
tree | ddf53b8bd9235bcb0b8aae16c5e22310dcdad665 /lib/Target | |
parent | cbb70ce070d220642b038ea101d9c0f9fbf860d6 (diff) | |
download | FreeBSD-src-c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc.zip FreeBSD-src-c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc.tar.gz |
Vendor import of llvm trunk r126547:
http://llvm.org/svn/llvm-project/llvm/trunk@126547
Diffstat (limited to 'lib/Target')
50 files changed, 732 insertions, 405 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1fb8872..7e2183d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,10 +155,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Code domain. DomainShift = 18, - DomainMask = 3 << DomainShift, + DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9f29530..26f48b3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -172,6 +172,7 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + unsigned ARMSelectCallOp(const GlobalValue *GV); // Call handling routines. private: @@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } +unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { + + // Depend our opcode for thumb on whether or not we're targeting an + // externally callable function. For libcalls we'll just pass a NULL GV + // in here. + bool isExternal = false; + if (!GV || GV->hasExternalLinkage()) isExternal = true; + + // Darwin needs the r9 versions of the opcodes. + bool isDarwin = Subtarget->isTargetDarwin(); + if (isThumb && isExternal) { + return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; + } else if (isThumb) { + return isDarwin ? ARM::tBLr9 : ARM::tBL; + } else { + return isDarwin ? ARM::BLr9 : ARM::BL; + } +} + // A quick function that will emit a call for a named libcall in F with the // vector of passed arguments for the Instruction in I. We can assume that we // can emit a call for any libcall we can produce. This is an abridged version @@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + unsigned CallOpc = ARMSelectCallOp(NULL); + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addExternalSymbol(TLI.getLibcallName(Call)); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addExternalSymbol(TLI.getLibcallName(Call))); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; + unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addGlobalAddress(GV, 0, 0); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index f42c6db..68c33f0 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. - if (DPRCSSize > 0) MBBI++; + if (DPRCSSize > 0) { + MBBI++; + // Since vpush register list cannot have gaps, there may be multiple vpush + // instructions in the prologue. + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + MBBI++; + } NumBytes = DPRCSOffset; if (NumBytes) { @@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; + if (AFI->getDPRCalleeSavedAreaSize()) { + MBBI++; + // Since vpop register list cannot have gaps, there may be multiple vpop + // instructions in the epilogue. + while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + MBBI++; + } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 676b01e..e97ce50 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) - return false; - } else + if (TID.mayStore()) return false; - return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + return false; } ScheduleHazardRecognizer::HazardType diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a506cff..f0d5a7d 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,7 @@ public: bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); @@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, + SDValue &Offset) { + LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); + ISD::MemIndexedMode AM = LdSt->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + Offset = N; + if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { + if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) + Offset = CurDAG->getRegister(0, MVT::i32); + } + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1835ec0..ab9f9e1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { - Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl); + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } @@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } @@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else RC = ARM::GPRRegisterClass; - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl); + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - bool F2IisFast = Subtarget->isCortexA9() || - Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || + Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool UseNEON = !InGPR && Subtarget->hasNEON(); + + if (UseNEON) { + // Use VBSL to copy the sign bit. + unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, + DAG.getTargetConstant(EncodedVal, MVT::i32)); + EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; + if (VT == MVT::f64) + Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), + DAG.getConstant(32, MVT::i32)); + else /*if (VT == MVT::f32)*/ + Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); + if (SrcVT == MVT::f32) { + Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); + if (VT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), + DAG.getConstant(32, MVT::i32)); + } + Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); + + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); + SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, + DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); + + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, + DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), + DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); + if (SrcVT == MVT::f32) { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, + DAG.getConstant(0, MVT::i32)); + } else { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); + } + + return Res; + } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) @@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { &Tmp1, 1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); - // If float to int conversion isn't going to be super expensive, then simply - // or in the signbit. - if (F2IisFast) { - SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); - SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); - Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); - if (VT == MVT::f32) { - Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); - } - - // f64: Or the high part with signbit and then combine two parts. - Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); - SDValue Lo = Tmp0.getValue(0); - SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); - Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + // Or in the signbit with integer operations. + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } - // Remove the signbit of operand 0. - Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - - // If operand 1 signbit is one, then negate operand 0. - SDValue ARMcc; - SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), - ISD::SETLT, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 765cba4..359ac45 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>; def IndexModeUpd : IndexMode<3>; // Instruction execution domain. -class Domain<bits<2> val> { - bits<2> Value = val; +class Domain<bits<3> val> { + bits<3> Value = val; } def GenericDomain : Domain<0>; def VFPDomain : Domain<1>; // Instructions in VFP domain only def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 //===----------------------------------------------------------------------===// // ARM special operands. @@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, let TSFlags{15-10} = Form; let TSFlags{16} = isUnaryDataProc; let TSFlags{17} = canXformTo16Bit; - let TSFlags{19-18} = D.Value; + let TSFlags{20-18} = D.Value; let Constraints = cstr; let Itinerary = itin; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c827ce3d..6e3fe2e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>, let EncoderMethod = "getAddrMode6AddressOpValue"; } -def am6offset : Operand<i32> { +def am6offset : Operand<i32>, + ComplexPattern<i32, 1, "SelectAddrMode6Offset", + [], [SDNPWantRoot]> { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1e2e550..dc3d63e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { - // ...with address register writeback: -class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> +class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + addrmode6:$Rn, am6offset:$Rm))]>; +class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> + : VSTQLNWBPseudo<IIC_VST1lnu> { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; -def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; -def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 920c5c9..2990283 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSUBD : ADbI<0b11100, 0b11, 1, 0, @@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VDIVD : ADbI<0b11101, 0b00, 0, 0, @@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VNMULD : ADbI<0b11100, 0b10, 1, 0, @@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // Match reassociated forms only if not sign dependent rounding. @@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fabs SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } let Defs = [FPSCR] in { @@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fneg SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, @@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FP -> Int: @@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, @@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, @@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), @@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), @@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0bd740c..1465984 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, // Materializable GVs (in JIT lazy compilation mode) do not require an extra // load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; if (!isTargetDarwin()) { // Extra load is needed for all externally visible. diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f9e86eb..9a27e2f 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { } bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = TID.getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (TID.mayStore() || TID.mayLoad()) - return false; - } else { + if (TID.mayStore()) return false; - } - - return MI->readsRegister(Reg, TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(Reg, TRI); return false; } diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 97e54bf..965665c 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -35,6 +35,7 @@ namespace { private: const TargetRegisterInfo *TRI; const ARMBaseInstrInfo *TII; + bool isA8; typedef DenseMap<unsigned, const MachineInstr*> RegMap; @@ -43,6 +44,11 @@ namespace { char NEONMoveFixPass::ID = 0; } +static bool inNEONDomain(unsigned Domain, bool isA8) { + return (Domain & ARMII::DomainNEON) || + (isA8 && (Domain & ARMII::DomainNEONA8)); +} + bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; @@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { Domain = ARMII::DomainNEON; } - if (Domain & ARMII::DomainNEON) { + if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); @@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { TRI = TM.getRegisterInfo(); TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2f67257..9b1073b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, bool Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + while (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == MBB.end()) + return false; + } + unsigned PredReg = 0; return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 9137d65..c4f43ab 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -48,7 +48,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the TargetLowering object. //I am having problems with shr n i8 1 - setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index b429e9f..cb98f92 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -31,25 +31,25 @@ namespace llvm { /// GPRelHi/GPRelLo - These represent the high and low 16-bit /// parts of a global address respectively. - GPRelHi, GPRelLo, + GPRelHi, GPRelLo, /// RetLit - Literal Relocation of a Global RelLit, /// GlobalRetAddr - used to restore the return address GlobalRetAddr, - + /// CALL - Normal call. CALL, /// DIVCALL - used for special library calls for div and rem DivCall, - + /// return flag operand RET_FLAG, /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. + /// corresponds to the COND_BRANCH pseudo instruction. /// *PRC is the input register to compare to zero, /// OPC is the branch opcode to use (e.g. Alpha::BEQ), /// DESTBB is the destination block to branch to, and INFLAG is @@ -62,7 +62,9 @@ namespace llvm { class AlphaTargetLowering : public TargetLowering { public: explicit AlphaTargetLowering(TargetMachine &TM); - + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// getSetCCResultType - Get the SETCC result ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -92,7 +94,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> + std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index dd27d0a..7c80eec 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -41,7 +41,6 @@ using namespace llvm; BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - setShiftAmountType(MVT::i16); setBooleanContents(ZeroOrOneBooleanContent); setStackPointerRegisterToSaveRestore(BF::SP); setIntDivIsCheap(false); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 15a745f..102c830 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -32,6 +32,7 @@ namespace llvm { class BlackfinTargetLowering : public TargetLowering { public: BlackfinTargetLowering(TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e6511d0..743a4d7 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -435,7 +435,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -1219,7 +1218,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), false, false, 0); @@ -2190,7 +2189,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, { SDValue N0 = Op.getOperand(0); // Everything has at least one operand DebugLoc dl = Op.getDebugLoc(); - EVT ShiftVT = TLI.getShiftAmountTy(); + EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); assert(Op.getValueType() == MVT::i8); switch (Opc) { @@ -3112,7 +3111,7 @@ SPUTargetLowering::getSingleConstraintMatchWeight( switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break;
+ break; //FIXME: Seems like the supported constraint letters were just copied // from PPC, as the following doesn't correspond to the GCC docs. // I'm leaving it so until someone adds the corresponding lowering support. diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 95d44af..dd48d7b 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -109,6 +109,8 @@ namespace llvm { /// getSetCCResultType - Return the ValueType for ISD::SETCC virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + //! Custom lowering hooks virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -179,9 +181,9 @@ namespace llvm { virtual bool isLegalICmpImmediate(int64_t Imm) const; - virtual bool isLegalAddressingMode(const AddrMode &AM, + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; - + /// After allocating this many registers, the allocator should feel /// register pressure. The value is a somewhat random guess, based on the /// number of non callee saved registers in the C calling convention. diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 2f40bfc..f39826b 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -907,7 +907,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl); + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -973,7 +973,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, for (; Start <= End; ++Start, ++StackLoc) { unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); - unsigned LiveReg = MF.addLiveIn(Reg, RC, dl); + unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); int FI = MFI->CreateFixedObject(4, 0, true); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 30ef4f5..a95d59c 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -77,10 +77,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : // Division is expensive setIntDivIsCheap(false); - // Even if we have only 1 bit shift here, we can perform - // shifts of the whole bitwidth 1 bit per step. - setShiftAmountType(MVT::i8); - setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::Latency); @@ -330,7 +326,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { - default: + default: { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 673c543..19c9eac 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -73,6 +73,8 @@ namespace llvm { public: explicit MSP430TargetLowering(MSP430TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8f623b8..70d00e4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -362,7 +362,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrOneBooleanContent); if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { @@ -1597,7 +1596,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( } // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); InVals.push_back(ArgValue); @@ -1689,7 +1688,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) - VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1708,7 +1707,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) - VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1872,7 +1871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), @@ -1891,7 +1890,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -1914,7 +1913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); ++GPR_idx; } else { @@ -1928,7 +1927,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -1966,9 +1965,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -1986,7 +1985,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2064,9 +2063,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (isPPC64) - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 80cab75..33daae9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -29,36 +29,36 @@ namespace llvm { /// FSEL - Traditional three-operand fsel node. /// FSEL, - + /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. FCFID, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - + /// STFIWX - The STFIWX instruction. The first operand is an input token /// chain, then an f64 value to store, then an address to store it to. STFIWX, - + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. VMADDFP, VNMSUBFP, - + /// VPERM - The PPC VPERM Instruction. /// VPERM, - + /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', /// though these are usually folded into other nodes. Hi, Lo, - + TOC_ENTRY, /// The following three target-specific nodes are used for calls through @@ -80,37 +80,37 @@ namespace llvm { /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. DYNALLOC, - + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, - + /// These nodes represent the 32-bit PPC shifts that operate on 6-bit /// shift amounts. These nodes are generated by the multi-precision shift /// code. SRL, SRA, SHL, - + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" /// registers. EXTSW_32, /// CALL - A direct function call. CALL_Darwin, CALL_SVR4, - + /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, - + /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. BCTRL_Darwin, BCTRL_SVR4, - + /// Return with a flag operand, matched by 'blr' RET_FLAG, - + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF /// instructions. This copies the bits corresponding to the specified /// CRREG into the resultant GPR. Bits corresponding to other CR regs @@ -122,20 +122,20 @@ namespace llvm { /// encoding for the OPC field to identify the compare. For example, 838 /// is VCMPGTSH. VCMP, - + /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*o instructions. For lack of better number, we use the + /// altivec VCMP*o instructions. For lack of better number, we use the /// opcode number encoding for the OPC field to identify the compare. For /// example, 838 is VCMPGTSH. VCMPo, - + /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the /// condition register to branch on, OPC is the branch opcode to use (e.g. /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// an optional input flag argument. COND_BRANCH, - + // The following 5 instructions are used only as part of the // long double-to-int conversion sequence. @@ -150,7 +150,7 @@ namespace llvm { MTFSB1, /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the + /// rounding towards zero. It has flags added so it won't move past the /// FPSCR-setting instructions. FADDRTZ, @@ -174,14 +174,14 @@ namespace llvm { /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. @@ -194,7 +194,7 @@ namespace llvm { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); - + /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); @@ -208,16 +208,16 @@ namespace llvm { /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary); - + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, bool isUnary); - + /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - + /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool isAllNegativeZeroVector(SDNode *N); @@ -225,24 +225,26 @@ namespace llvm { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); - + /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); } - + class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); - + /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -253,19 +255,19 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; - + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - + /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it /// is not better represented as reg+reg. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, @@ -277,7 +279,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// LowerOperation - Provide custom lowering hooks for some operations. /// virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -289,10 +291,10 @@ namespace llvm { SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - + virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; @@ -300,13 +302,13 @@ namespace llvm { virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; - MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; - + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -314,7 +316,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -329,11 +331,11 @@ namespace llvm { char ConstraintLetter, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; - + /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. @@ -344,7 +346,7 @@ namespace llvm { virtual bool isLegalAddressImmediate(GlobalValue *GV) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - + /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4e14fbb..f85914b 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -254,6 +254,20 @@ unsigned long reverse(unsigned v) { //===---------------------------------------------------------------------===// +[LOOP DELETION] + +We don't delete this output free loop, because trip count analysis doesn't +realize that it is finite (if it were infinite, it would be undefined). Not +having this blocks Loop Idiom from matching strlen and friends. + +void foo(char *C) { + int x = 0; + while (*C) + ++x,++C; +} + +//===---------------------------------------------------------------------===// + [LOOP RECOGNITION] These idioms should be recognized as popcount (see PR1488): @@ -287,6 +301,16 @@ unsigned int popcount(unsigned int input) { return count; } +This should be recognized as CLZ: rdar://8459039 + +unsigned clz_a(unsigned a) { + int i; + for (i=0;i<32;i++) + if (a & (1<<(31-i))) + return i; + return 32; +} + This sort of thing should be added to the loop idiom pass. //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index ee29275..4b12852 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -79,6 +79,7 @@ namespace { MachineBasicBlock::iterator findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); + bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); }; char Filler::ID = 0; @@ -91,6 +92,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { return new Filler(tm); } + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. /// @@ -112,6 +114,13 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); else MBB.splice(++J, &MBB, D); + unsigned structSize = 0; + if (needsUnimp(I, structSize)) { + MachineBasicBlock::iterator J = I; + ++J; //skip the delay filler. + BuildMI(MBB, ++J, I->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); + } } return Changed; } @@ -287,6 +296,28 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, { if (candidate == MBB.begin()) return false; + if (candidate->getOpcode() == SP::UNIMP) + return true; const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); return prevdesc.hasDelaySlot(); } + +bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) +{ + if (!I->getDesc().isCall()) + return false; + + unsigned structSizeOpNum = 0; + switch (I->getOpcode()) { + default: llvm_unreachable("Unknown call opcode."); + case SP::CALL: structSizeOpNum = 1; break; + case SP::JMPLrr: + case SP::JMPLri: structSizeOpNum = 2; break; + } + + const MachineOperand &MO = I->getOperand(structSizeOpNum); + if (!MO.isImm()) + return false; + StructSize = MO.getImm(); + return true; +} diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 196b87d..70574c3 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -16,7 +16,9 @@ #include "SparcISelLowering.h" #include "SparcTargetMachine.h" #include "SparcMachineFunctionInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/Module.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -116,6 +118,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); } + + unsigned RetAddrOffset = 8; //Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 if (MF.getFunction()->hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); @@ -127,11 +131,16 @@ SparcTargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); if (MF.getRegInfo().liveout_empty()) MF.getRegInfo().addLiveOut(SP::I0); + RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } + SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32); + if (Flag.getNode()) - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode, Flag); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode); } /// LowerFormalArguments - V8 uses a very simple ABI, where all values are @@ -194,7 +203,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, false, false, 0); } else { unsigned loReg = MF.addLiveIn(NextVA.getLocReg(), - &SP::IntRegsRegClass, dl); + &SP::IntRegsRegClass); LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32); } SDValue WholeValue = @@ -393,6 +402,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> MemOpChains; const unsigned StackOffset = 92; + bool hasStructRetAttr = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size(); i != e; @@ -433,6 +443,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); + hasStructRetAttr = true; continue; } @@ -546,6 +557,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0; + // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. @@ -559,6 +572,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); + if (hasStructRetAttr) + Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; if (Reg >= SP::I0 && Reg <= SP::I7) @@ -600,7 +615,29 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, return Chain; } +unsigned +SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const +{ + const Function *CalleeFn = 0; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + CalleeFn = dyn_cast<Function>(G->getGlobal()); + } else if (ExternalSymbolSDNode *E = + dyn_cast<ExternalSymbolSDNode>(Callee)) { + const Function *Fn = DAG.getMachineFunction().getFunction(); + const Module *M = Fn->getParent(); + CalleeFn = M->getFunction(E->getSymbol()); + } + + if (!CalleeFn) + return 0; + assert(CalleeFn->hasStructRetAttr() && + "Callee does not have the StructRet attribute."); + + const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); + const Type *ElementTy = Ty->getElementType(); + return getTargetData()->getTypeAllocSize(ElementTy); +} //===----------------------------------------------------------------------===// // TargetLowering Implementation diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 849e401..7d02df8 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -101,6 +101,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 1072323..cf5c48f 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -124,7 +124,8 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, +def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, @@ -132,7 +133,7 @@ def flushw : SDNode<"SPISD::FLUSHW", SDTNone, def getPCX : Operand<i32> { let PrintMethod = "printGetPCX"; -} +} //===----------------------------------------------------------------------===// // SPARC Flag Conditions @@ -232,6 +233,9 @@ let hasSideEffects = 1, mayStore = 1 in { [(flushw)]>; } +def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), + "unimp $val", []>; + // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the // fpmover pass. let Predicates = [HasNoV9] in { // Only emit these in V8 mode. @@ -292,11 +296,13 @@ let usesCustomInserter = 1, Uses = [FCC] in { // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { - let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in - def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; + let rd = O7.Num, rs1 = G0.Num in + def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %o7+$val", [(retflag simm13:$val)]>; - let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in - def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>; + let rd = I7.Num, rs1 = G0.Num in + def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %i7+$val", []>; } // Section B.1 - Load Integer Instructions, p. 90 diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d694f2e..90939c3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -59,9 +59,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // Compute derived properties from the register classes computeRegisterProperties(); - // Set shifts properties - setShiftAmountType(MVT::i64); - // Provide all sorts of operation actions setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 51d2df3..3019242 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -57,6 +57,8 @@ namespace llvm { public: explicit SystemZTargetLowering(SystemZTargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 1cac07a..8fe549b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } + // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". + if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to // "shift <op>". diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 691e2d7..f777756 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, switch (insn.displacementSize) { default: break; - case 8: + case 1: type = TYPE_MOFFS8; break; - case 16: + case 2: type = TYPE_MOFFS16; break; - case 32: + case 4: type = TYPE_MOFFS32; break; - case 64: + case 8: type = TYPE_MOFFS64; break; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 4f4fbcd..d0dc8b5 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -399,7 +399,7 @@ struct InternalInstruction { /* The segment override type */ SegmentOverride segmentOverride; - /* Sizes of various critical pieces of data */ + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index c10e170..abd1515 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1879,39 +1879,71 @@ _add32carry: //===---------------------------------------------------------------------===// -This: -char t(char c) { - return c/3; +The hot loop of 256.bzip2 contains code that looks a bit like this: + +int foo(char *P, char *Q, int x, int y) { + if (P[0] != Q[0]) + return P[0] < Q[0]; + if (P[1] != Q[1]) + return P[1] < Q[1]; + if (P[2] != Q[2]) + return P[2] < Q[2]; + return P[3] < Q[3]; } -Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer +In the real code, we get a lot more wrong than this. However, even in this +code we generate: -_t: ## @t - movslq %edi, %rax - imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB - shrq $32, %rcx - addl %ecx, %eax - movl %eax, %ecx - shrl $31, %ecx - shrl %eax - addl %ecx, %eax - movsbl %al, %eax +_foo: ## @foo +## BB#0: ## %entry + movb (%rsi), %al + movb (%rdi), %cl + cmpb %al, %cl + je LBB0_2 +LBB0_1: ## %if.then + cmpb %al, %cl + jmp LBB0_5 +LBB0_2: ## %if.end + movb 1(%rsi), %al + movb 1(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#3: ## %if.end38 + movb 2(%rsi), %al + movb 2(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#4: ## %if.end60 + movb 3(%rdi), %al + cmpb 3(%rsi), %al +LBB0_5: ## %if.end60 + setl %al + movzbl %al, %eax ret -GCC gets: +Note that we generate jumps to LBB0_1 which does a redundant compare. The +redundant compare also forces the register values to be live, which prevents +folding one of the loads into the compare. In contrast, GCC 4.2 produces: -_t: - movl $86, %eax - imulb %dil - shrw $8, %ax - sarb $7, %dil - subb %dil, %al - movsbl %al,%eax +_foo: + movzbl (%rsi), %eax + cmpb %al, (%rdi) + jne L10 +L12: + movzbl 1(%rsi), %eax + cmpb %al, 1(%rdi) + jne L10 + movzbl 2(%rsi), %eax + cmpb %al, 2(%rdi) + jne L10 + movzbl 3(%rdi), %eax + cmpb 3(%rsi), %al +L10: + setl %al + movzbl %al, %eax ret -which is nicer. This also happens for int, not just char. +which is "perfect". //===---------------------------------------------------------------------===// - - diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9d42ac2..6fa9284 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; - // Can't handle TLS or DLLImport. + // Can't handle DLLImport. + if (GV->hasDLLImportLinkage()) + return false; + + // Can't handle TLS. if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + if (GVar->isThreadLocal()) return false; // Okay, we've committed to selecting this global. Set up the basic address. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 27024b4..2f49dbc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -56,10 +55,6 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt<bool> -Disable256Bit("disable-256bit", cl::Hidden, - cl::desc("Disable use of 256-bit vectors")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. - setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass, dl); + X86::GR64RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<SDValue, 11> SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], - X86::VR128RegisterClass, dl); + X86::VR128RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, - DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); + DAG.getConstant(NumBits, + TLI.getShiftAmountTy(SrcOp.getValueType())))); } SDValue @@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !Disable256Bit && + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && !ISD::isBuildVectorAllZeros(Op.getNode())) { SmallVector<SDValue, 8> V; V.resize(NumElems); for (unsigned i = 0; i < NumElems; ++i) { V[i] = Op.getOperand(i); } - + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build the lower subvector. @@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DAG.getIntPtrConstant(Elt1 / 2)); if ((Elt1 & 1) == 0) InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt.getValueType()))); else if (Elt0 >= 0) InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, DAG.getConstant(0xFF00, MVT::i16)); @@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); if ((Elt0 & 1) != 0) InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt0.getValueType()))); else if (Elt1 >= 0) InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, DAG.getConstant(0x00FF, MVT::i16)); @@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; - + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue ScaledN2 = N2; if (Upper) ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / + DAG.getConstant(NumElems / (VT.getSizeInBits() / 128), N2.getValueType())); Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, @@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; + case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; + case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 419da37..6ec4a7d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -159,16 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + /// PANDN - and with not'd value. PANDN, - + /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, - + PSIGNB, PSIGNW, PSIGND, + /// PBLENDVB - Variable blend PBLENDVB, - + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -212,7 +212,7 @@ namespace llvm { // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -248,6 +248,10 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, + VUNPCKLPS, + VUNPCKLPD, + VUNPCKLPSY, + VUNPCKLPDY, UNPCKHPS, UNPCKHPD, PUNPCKLBW, @@ -463,6 +467,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 344c14c..0660072 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -41,6 +41,8 @@ def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; +def MRM_D0 : Format<45>; +def MRM_D1 : Format<46>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ceb1b65..76a9b12 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, @@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, 16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, { X86::MAXPSrr, X86::MAXPSrm, 16 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 1d44207..fcb5a25 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,8 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -577,6 +579,8 @@ namespace X86II { case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: return -1; } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 87dc4be..f832a7c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">; def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; + def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 1a58ba0..6a24d14 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +let Defs = [RDX, RAX], Uses = [RCX] in + def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; + +let Uses = [RDX, RAX, RCX] in + def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index e6dc74e..0e3b571 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitByte(0xF9, CurByte, OS); break; + case X86II::MRM_D0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD0, CurByte, OS); + break; + case X86II::MRM_D1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD1, CurByte, OS); + break; } // If there is a remaining operand, it must be a trailing immediate. Emit it diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index de76856..1ee7312 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and - // for all 64-bit targets. - if (isTargetDarwin() || isTargetLinux() || Is64Bit) + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both + // 32 and 64 bit) and for all 64-bit targets. + if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a119b4..0a62a02 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,6 +166,8 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } + bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 828d6f9..4817787 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -42,9 +42,9 @@ using namespace llvm; const char *XCoreTargetLowering:: -getTargetNodeName(unsigned Opcode) const +getTargetNodeName(unsigned Opcode) const { - switch (Opcode) + switch (Opcode) { case XCoreISD::BL : return "XCoreISD::BL"; case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; @@ -77,7 +77,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Division is expensive setIntDivIsCheap(false); - setShiftAmountType(MVT::i32); setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(Sched::RegPressure); @@ -95,7 +94,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Stop the combiner recombining select and set_cc setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - + // 64bit setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i64, Custom); @@ -106,14 +105,14 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - + // Bit Manipulation setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); - + setOperationAction(ISD::TRAP, MVT::Other, Legal); - + // Jump tables. setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -122,7 +121,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Thread Local Storage setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - + // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -143,7 +142,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - + // Dynamic stack setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -163,7 +162,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) + switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -414,7 +413,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); DebugLoc DL = Op.getDebugLoc(); - + SDValue Base; int64_t Offset; if (!LD->isVolatile() && @@ -437,10 +436,10 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - + SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, LowAddr, MachinePointerInfo(), false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, @@ -453,7 +452,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + if (LD->getAlignment() == 2) { SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, LD->getPointerInfo(), MVT::i16, @@ -473,16 +472,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + // Lower to a call to __misaligned_load(BasePtr). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, IntPtrTy, false, false, false, false, 0, CallingConv::C, false, @@ -515,7 +514,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); DebugLoc dl = Op.getDebugLoc(); - + if (ST->getAlignment() == 2) { SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, @@ -532,19 +531,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } - + // Lower to a call to __misaligned_store(BasePtr, Value). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + Entry.Node = Value; Args.push_back(Entry); - + std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, @@ -722,7 +721,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const } DebugLoc dl = N->getDebugLoc(); - + // Extract components SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, MVT::i32)); @@ -732,7 +731,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const N->getOperand(1), DAG.getConstant(0, MVT::i32)); SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(1), DAG.getConstant(1, MVT::i32)); - + // Expand unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; @@ -740,7 +739,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSL, RHSL, Zero); SDValue Lo(Carry.getNode(), 1); - + SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSH, RHSH, Carry); SDValue Hi(Ignored.getNode(), 1); @@ -761,8 +760,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Node->getOperand(1), MachinePointerInfo(V), false, false, 0); // Increment the pointer, VAList, to the next vararg - SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, - DAG.getConstant(VT.getSizeInBits(), + SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, + DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), @@ -781,20 +780,20 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const MachineFunction &MF = DAG.getMachineFunction(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), MachinePointerInfo(), false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! + // Depths > 0 not supported yet! if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); - + MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, RegInfo->getFrameRegister(MF), MVT::i32); } @@ -919,7 +918,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy(), true)); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; @@ -944,8 +943,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } - - // Arguments that can be passed on register must be kept at + + // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -954,7 +953,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, int Offset = VA.getLocMemOffset(); - MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, + MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, Chain, Arg, DAG.getConstant(Offset/4, MVT::i32))); } @@ -963,16 +962,16 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token + // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -986,7 +985,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); // XCoreBranchLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... + // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -994,7 +993,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Ops.push_back(Chain); Ops.push_back(Callee); - // Add argument registers to the end of the list so that they are + // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, @@ -1098,11 +1097,11 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize(); unsigned LRSaveSize = StackSlotSize; - + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); @@ -1139,12 +1138,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } - + if (isVarArg) { /* Argument registers */ static const unsigned ArgRegs[] = { @@ -1186,7 +1185,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, true)); } } - + return Chain; } @@ -1222,7 +1221,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); - // If this is the first return lowered for this function, add + // If this is the first return lowered for this function, add // the regs to the liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) @@ -1237,7 +1236,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are @@ -1265,7 +1264,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && "Unexpected instr type to insert"); - + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to @@ -1273,7 +1272,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; - + // thisMBB: // ... // TrueVal = ... @@ -1296,7 +1295,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); @@ -1304,10 +1303,10 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; - + // Update machine-CFG edges BB->addSuccessor(sinkMBB); - + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -1316,7 +1315,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1354,7 +1353,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1377,7 +1376,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N0.getValueType(); // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set - if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { + if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1393,7 +1392,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1557,7 +1556,7 @@ static inline bool isImmUs4(int64_t val) /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool -XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, +XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); @@ -1568,7 +1567,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs%4 == 0; } - + switch (Size) { case 1: // reg + imm @@ -1593,7 +1592,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // reg + reg<<2 return AM.Scale == 4 && AM.BaseOffs == 0; } - + return false; } @@ -1603,7 +1602,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::vector<unsigned> XCoreTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const + EVT VT) const { if (Constraint.size() != 1) return std::vector<unsigned>(); @@ -1611,9 +1610,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { default : break; case 'r': - return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2, - XCore::R3, XCore::R4, XCore::R5, - XCore::R6, XCore::R7, XCore::R8, + return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2, + XCore::R3, XCore::R4, XCore::R5, + XCore::R6, XCore::R7, XCore::R8, XCore::R9, XCore::R10, XCore::R11, 0); break; } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 7e5dd2e..bb3f2cc 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -20,11 +20,11 @@ #include "XCore.h" namespace llvm { - + // Forward delcarations class XCoreSubtarget; class XCoreTargetMachine; - + namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. @@ -38,16 +38,16 @@ namespace llvm { // dp relative address DPRelativeWrapper, - + // cp relative address CPRelativeWrapper, - + // Store word to stack STWSP, // Corresponds to retsp instruction RETSP, - + // Corresponds to LADD instruction LADD, @@ -74,13 +74,14 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - class XCoreTargetLowering : public TargetLowering + class XCoreTargetLowering : public TargetLowering { public: explicit XCoreTargetLowering(XCoreTargetMachine &TM); virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -91,10 +92,10 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const; - /// getTargetNodeName - This method returns the name of a target specific + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -108,7 +109,7 @@ namespace llvm { private: const XCoreTargetMachine &TM; const XCoreSubtarget &Subtarget; - + // Lower Operand helpers SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -148,12 +149,12 @@ namespace llvm { SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - + // Inline asm support std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 38cc734..ecdd4cb 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -727,7 +727,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp, +// TODO setd, eet, eef, testwct, tinitpc, tinitdp, // tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in @@ -758,6 +758,14 @@ def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; +def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "getts $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>; + +def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "setpt res[$r], $val", + [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>; + def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "outct res[$r], $val", [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; @@ -774,6 +782,11 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "out res[$r], $val", [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; +let Constraints = "$src = $dst" in +def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "outshr res[$r], $src", + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; + def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "inct $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; @@ -786,6 +799,11 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "in $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>; +let Constraints = "$src = $dst" in +def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "inshr $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; + def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "chkct res[$r], $val", [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; @@ -799,7 +817,7 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; // Two operand long -// TODO settw, setclk, setrdy, setpsc, endin, peek, +// TODO setclk, setrdy, setpsc, endin, peek, // getd, testlcl, tinitlr, getps, setps def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", @@ -813,13 +831,17 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; -def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val), +def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setc res[$r], $val", [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; +def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "settw res[$r], $val", + [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp -// setdp, setcp, setv, setev, kcall +// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp +// setdp, setcp, setev, kcall // dgetreg let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), @@ -859,20 +881,41 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), [(XCoreBranchLink GRRegs:$addr)]>; } +def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r), + "syncr res[$r]", + [(int_xcore_syncr GRRegs:$r)]>; + def FREER_1r : _F1R<(outs), (ins GRRegs:$r), "freer res[$r]", [(int_xcore_freer GRRegs:$r)]>; +let Uses=[R11] in +def SETV_1r : _F1R<(outs), (ins GRRegs:$r), + "setv res[$r], r11", + [(int_xcore_setv GRRegs:$r, R11)]>; + +def EEU_1r : _F1R<(outs), (ins GRRegs:$r), + "eeu res[$r]", + [(int_xcore_eeu GRRegs:$r)]>; + // Zero operand short -// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp +def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; + let Defs = [R11] in def GETID_0R : _F0R<(outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, + hasSideEffects = 1 in +def WAITEU_0R : _F0R<(outs), (ins), + "waiteu", + [(brind (int_xcore_waitevent))]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// |