diff options
author | dim <dim@FreeBSD.org> | 2011-02-27 01:32:10 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-02-27 01:32:10 +0000 |
commit | b951d621be1d00a520871c689c1cd687b6aa3ae6 (patch) | |
tree | 5c342f2374324ffec4626f558d9aa49f323f90b4 /contrib/llvm/lib/Target | |
parent | 4004d6a3076e94bd23e681411c43682267a202fe (diff) | |
parent | a0fb00f9837bd0d2e5948f16f6a6b82a7a628f51 (diff) | |
download | FreeBSD-src-b951d621be1d00a520871c689c1cd687b6aa3ae6.zip FreeBSD-src-b951d621be1d00a520871c689c1cd687b6aa3ae6.tar.gz |
Update llvm/clang to trunk r126547.
There are several bugfixes in this update, but the most important one is
to ensure __start_ and __stop_ symbols for linker sets and kernel module
metadata are always emitted in object files:
http://llvm.org/bugs/show_bug.cgi?id=9292
Before this fix, if you compiled kernel modules with clang, they would
not be properly processed by kldxref, and if they had any dependencies,
the kernel would fail to load those. Another problem occurred when
attempting to mount a tmpfs filesystem, which would result in 'operation
not supported by device'.
Diffstat (limited to 'contrib/llvm/lib/Target')
48 files changed, 651 insertions, 380 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 1fb8872..7e2183d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,10 +155,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Code domain. DomainShift = 18, - DomainMask = 3 << DomainShift, + DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 9f29530..26f48b3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -172,6 +172,7 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + unsigned ARMSelectCallOp(const GlobalValue *GV); // Call handling routines. private: @@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } +unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { + + // Depend our opcode for thumb on whether or not we're targeting an + // externally callable function. For libcalls we'll just pass a NULL GV + // in here. + bool isExternal = false; + if (!GV || GV->hasExternalLinkage()) isExternal = true; + + // Darwin needs the r9 versions of the opcodes. + bool isDarwin = Subtarget->isTargetDarwin(); + if (isThumb && isExternal) { + return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; + } else if (isThumb) { + return isDarwin ? ARM::tBLr9 : ARM::tBL; + } else { + return isDarwin ? ARM::BLr9 : ARM::BL; + } +} + // A quick function that will emit a call for a named libcall in F with the // vector of passed arguments for the Instruction in I. We can assume that we // can emit a call for any libcall we can produce. This is an abridged version @@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + unsigned CallOpc = ARMSelectCallOp(NULL); + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addExternalSymbol(TLI.getLibcallName(Call)); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addExternalSymbol(TLI.getLibcallName(Call))); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; + unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addGlobalAddress(GV, 0, 0); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index f42c6db..68c33f0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. - if (DPRCSSize > 0) MBBI++; + if (DPRCSSize > 0) { + MBBI++; + // Since vpush register list cannot have gaps, there may be multiple vpush + // instructions in the prologue. + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + MBBI++; + } NumBytes = DPRCSOffset; if (NumBytes) { @@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; + if (AFI->getDPRCalleeSavedAreaSize()) { + MBBI++; + // Since vpop register list cannot have gaps, there may be multiple vpop + // instructions in the epilogue. + while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + MBBI++; + } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 676b01e..e97ce50 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) - return false; - } else + if (TID.mayStore()) return false; - return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + return false; } ScheduleHazardRecognizer::HazardType diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index a506cff..f0d5a7d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,7 @@ public: bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); @@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, + SDValue &Offset) { + LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); + ISD::MemIndexedMode AM = LdSt->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + Offset = N; + if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { + if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) + Offset = CurDAG->getRegister(0, MVT::i32); + } + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1835ec0..ab9f9e1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { - Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl); + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } @@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } @@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else RC = ARM::GPRRegisterClass; - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl); + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - bool F2IisFast = Subtarget->isCortexA9() || - Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || + Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool UseNEON = !InGPR && Subtarget->hasNEON(); + + if (UseNEON) { + // Use VBSL to copy the sign bit. + unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, + DAG.getTargetConstant(EncodedVal, MVT::i32)); + EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; + if (VT == MVT::f64) + Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), + DAG.getConstant(32, MVT::i32)); + else /*if (VT == MVT::f32)*/ + Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); + if (SrcVT == MVT::f32) { + Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); + if (VT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), + DAG.getConstant(32, MVT::i32)); + } + Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); + + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); + SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, + DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); + + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, + DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), + DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); + if (SrcVT == MVT::f32) { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, + DAG.getConstant(0, MVT::i32)); + } else { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); + } + + return Res; + } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) @@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { &Tmp1, 1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); - // If float to int conversion isn't going to be super expensive, then simply - // or in the signbit. - if (F2IisFast) { - SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); - SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); - Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); - if (VT == MVT::f32) { - Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); - } - - // f64: Or the high part with signbit and then combine two parts. - Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); - SDValue Lo = Tmp0.getValue(0); - SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); - Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + // Or in the signbit with integer operations. + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } - // Remove the signbit of operand 0. - Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - - // If operand 1 signbit is one, then negate operand 0. - SDValue ARMcc; - SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), - ISD::SETLT, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 765cba4..359ac45 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>; def IndexModeUpd : IndexMode<3>; // Instruction execution domain. -class Domain<bits<2> val> { - bits<2> Value = val; +class Domain<bits<3> val> { + bits<3> Value = val; } def GenericDomain : Domain<0>; def VFPDomain : Domain<1>; // Instructions in VFP domain only def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 //===----------------------------------------------------------------------===// // ARM special operands. @@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, let TSFlags{15-10} = Form; let TSFlags{16} = isUnaryDataProc; let TSFlags{17} = canXformTo16Bit; - let TSFlags{19-18} = D.Value; + let TSFlags{20-18} = D.Value; let Constraints = cstr; let Itinerary = itin; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index c827ce3d..6e3fe2e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>, let EncoderMethod = "getAddrMode6AddressOpValue"; } -def am6offset : Operand<i32> { +def am6offset : Operand<i32>, + ComplexPattern<i32, 1, "SelectAddrMode6Offset", + [], [SDNPWantRoot]> { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 1e2e550..dc3d63e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { - // ...with address register writeback: -class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> +class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + addrmode6:$Rn, am6offset:$Rm))]>; +class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> + : VSTQLNWBPseudo<IIC_VST1lnu> { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; -def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; -def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index 920c5c9..2990283 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSUBD : ADbI<0b11100, 0b11, 1, 0, @@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VDIVD : ADbI<0b11101, 0b00, 0, 0, @@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VNMULD : ADbI<0b11100, 0b10, 1, 0, @@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // Match reassociated forms only if not sign dependent rounding. @@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fabs SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } let Defs = [FPSCR] in { @@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fneg SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, @@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FP -> Int: @@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, @@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, @@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), @@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), @@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // neverHasSideEffects diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 0bd740c..1465984 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, // Materializable GVs (in JIT lazy compilation mode) do not require an extra // load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; if (!isTargetDarwin()) { // Extra load is needed for all externally visible. diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index f9e86eb..9a27e2f 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { } bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = TID.getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (TID.mayStore() || TID.mayLoad()) - return false; - } else { + if (TID.mayStore()) return false; - } - - return MI->readsRegister(Reg, TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(Reg, TRI); return false; } diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp index 97e54bf..965665c 100644 --- a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp +++ b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp @@ -35,6 +35,7 @@ namespace { private: const TargetRegisterInfo *TRI; const ARMBaseInstrInfo *TII; + bool isA8; typedef DenseMap<unsigned, const MachineInstr*> RegMap; @@ -43,6 +44,11 @@ namespace { char NEONMoveFixPass::ID = 0; } +static bool inNEONDomain(unsigned Domain, bool isA8) { + return (Domain & ARMII::DomainNEON) || + (isA8 && (Domain & ARMII::DomainNEONA8)); +} + bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; @@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { Domain = ARMII::DomainNEON; } - if (Domain & ARMII::DomainNEON) { + if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); @@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { TRI = TM.getRegisterInfo(); TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 2f67257..9b1073b 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, bool Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + while (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == MBB.end()) + return false; + } + unsigned PredReg = 0; return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index 9137d65..c4f43ab 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -48,7 +48,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the TargetLowering object. //I am having problems with shr n i8 1 - setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h index b429e9f..cb98f92 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h @@ -31,25 +31,25 @@ namespace llvm { /// GPRelHi/GPRelLo - These represent the high and low 16-bit /// parts of a global address respectively. - GPRelHi, GPRelLo, + GPRelHi, GPRelLo, /// RetLit - Literal Relocation of a Global RelLit, /// GlobalRetAddr - used to restore the return address GlobalRetAddr, - + /// CALL - Normal call. CALL, /// DIVCALL - used for special library calls for div and rem DivCall, - + /// return flag operand RET_FLAG, /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. + /// corresponds to the COND_BRANCH pseudo instruction. /// *PRC is the input register to compare to zero, /// OPC is the branch opcode to use (e.g. Alpha::BEQ), /// DESTBB is the destination block to branch to, and INFLAG is @@ -62,7 +62,9 @@ namespace llvm { class AlphaTargetLowering : public TargetLowering { public: explicit AlphaTargetLowering(TargetMachine &TM); - + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// getSetCCResultType - Get the SETCC result ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -92,7 +94,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> + std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp index dd27d0a..7c80eec 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -41,7 +41,6 @@ using namespace llvm; BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - setShiftAmountType(MVT::i16); setBooleanContents(ZeroOrOneBooleanContent); setStackPointerRegisterToSaveRestore(BF::SP); setIntDivIsCheap(false); diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h index 15a745f..102c830 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h @@ -32,6 +32,7 @@ namespace llvm { class BlackfinTargetLowering : public TargetLowering { public: BlackfinTargetLowering(TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp index e6511d0..743a4d7 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -435,7 +435,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -1219,7 +1218,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), false, false, 0); @@ -2190,7 +2189,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, { SDValue N0 = Op.getOperand(0); // Everything has at least one operand DebugLoc dl = Op.getDebugLoc(); - EVT ShiftVT = TLI.getShiftAmountTy(); + EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); assert(Op.getValueType() == MVT::i8); switch (Opc) { @@ -3112,7 +3111,7 @@ SPUTargetLowering::getSingleConstraintMatchWeight( switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break;
+ break; //FIXME: Seems like the supported constraint letters were just copied // from PPC, as the following doesn't correspond to the GCC docs. // I'm leaving it so until someone adds the corresponding lowering support. diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h index 95d44af..dd48d7b 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h @@ -109,6 +109,8 @@ namespace llvm { /// getSetCCResultType - Return the ValueType for ISD::SETCC virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + //! Custom lowering hooks virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -179,9 +181,9 @@ namespace llvm { virtual bool isLegalICmpImmediate(int64_t Imm) const; - virtual bool isLegalAddressingMode(const AddrMode &AM, + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; - + /// After allocating this many registers, the allocator should feel /// register pressure. The value is a somewhat random guess, based on the /// number of non callee saved registers in the C calling convention. diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp index 2f40bfc..f39826b 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -907,7 +907,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl); + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -973,7 +973,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, for (; Start <= End; ++Start, ++StackLoc) { unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); - unsigned LiveReg = MF.addLiveIn(Reg, RC, dl); + unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); int FI = MFI->CreateFixedObject(4, 0, true); diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index 30ef4f5..a95d59c 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -77,10 +77,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : // Division is expensive setIntDivIsCheap(false); - // Even if we have only 1 bit shift here, we can perform - // shifts of the whole bitwidth 1 bit per step. - setShiftAmountType(MVT::i8); - setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::Latency); @@ -330,7 +326,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { - default: + default: { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h index 673c543..19c9eac 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h @@ -73,6 +73,8 @@ namespace llvm { public: explicit MSP430TargetLowering(MSP430TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8f623b8..70d00e4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -362,7 +362,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrOneBooleanContent); if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { @@ -1597,7 +1596,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( } // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); InVals.push_back(ArgValue); @@ -1689,7 +1688,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) - VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1708,7 +1707,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) - VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1872,7 +1871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), @@ -1891,7 +1890,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -1914,7 +1913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); ++GPR_idx; } else { @@ -1928,7 +1927,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -1966,9 +1965,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -1986,7 +1985,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2064,9 +2063,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (isPPC64) - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 80cab75..33daae9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -29,36 +29,36 @@ namespace llvm { /// FSEL - Traditional three-operand fsel node. /// FSEL, - + /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. FCFID, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - + /// STFIWX - The STFIWX instruction. The first operand is an input token /// chain, then an f64 value to store, then an address to store it to. STFIWX, - + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. VMADDFP, VNMSUBFP, - + /// VPERM - The PPC VPERM Instruction. /// VPERM, - + /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', /// though these are usually folded into other nodes. Hi, Lo, - + TOC_ENTRY, /// The following three target-specific nodes are used for calls through @@ -80,37 +80,37 @@ namespace llvm { /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. DYNALLOC, - + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, - + /// These nodes represent the 32-bit PPC shifts that operate on 6-bit /// shift amounts. These nodes are generated by the multi-precision shift /// code. SRL, SRA, SHL, - + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" /// registers. EXTSW_32, /// CALL - A direct function call. CALL_Darwin, CALL_SVR4, - + /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, - + /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. BCTRL_Darwin, BCTRL_SVR4, - + /// Return with a flag operand, matched by 'blr' RET_FLAG, - + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF /// instructions. This copies the bits corresponding to the specified /// CRREG into the resultant GPR. Bits corresponding to other CR regs @@ -122,20 +122,20 @@ namespace llvm { /// encoding for the OPC field to identify the compare. For example, 838 /// is VCMPGTSH. VCMP, - + /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*o instructions. For lack of better number, we use the + /// altivec VCMP*o instructions. For lack of better number, we use the /// opcode number encoding for the OPC field to identify the compare. For /// example, 838 is VCMPGTSH. VCMPo, - + /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the /// condition register to branch on, OPC is the branch opcode to use (e.g. /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// an optional input flag argument. COND_BRANCH, - + // The following 5 instructions are used only as part of the // long double-to-int conversion sequence. @@ -150,7 +150,7 @@ namespace llvm { MTFSB1, /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the + /// rounding towards zero. It has flags added so it won't move past the /// FPSCR-setting instructions. FADDRTZ, @@ -174,14 +174,14 @@ namespace llvm { /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. @@ -194,7 +194,7 @@ namespace llvm { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); - + /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); @@ -208,16 +208,16 @@ namespace llvm { /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary); - + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, bool isUnary); - + /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - + /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool isAllNegativeZeroVector(SDNode *N); @@ -225,24 +225,26 @@ namespace llvm { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); - + /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); } - + class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); - + /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -253,19 +255,19 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; - + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - + /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it /// is not better represented as reg+reg. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, @@ -277,7 +279,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// LowerOperation - Provide custom lowering hooks for some operations. /// virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -289,10 +291,10 @@ namespace llvm { SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - + virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; @@ -300,13 +302,13 @@ namespace llvm { virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; - MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; - + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -314,7 +316,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -329,11 +331,11 @@ namespace llvm { char ConstraintLetter, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; - + /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. @@ -344,7 +346,7 @@ namespace llvm { virtual bool isLegalAddressImmediate(GlobalValue *GV) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - + /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index ee29275..4b12852 100644 --- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -79,6 +79,7 @@ namespace { MachineBasicBlock::iterator findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); + bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); }; char Filler::ID = 0; @@ -91,6 +92,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { return new Filler(tm); } + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. /// @@ -112,6 +114,13 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); else MBB.splice(++J, &MBB, D); + unsigned structSize = 0; + if (needsUnimp(I, structSize)) { + MachineBasicBlock::iterator J = I; + ++J; //skip the delay filler. + BuildMI(MBB, ++J, I->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); + } } return Changed; } @@ -287,6 +296,28 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, { if (candidate == MBB.begin()) return false; + if (candidate->getOpcode() == SP::UNIMP) + return true; const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); return prevdesc.hasDelaySlot(); } + +bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) +{ + if (!I->getDesc().isCall()) + return false; + + unsigned structSizeOpNum = 0; + switch (I->getOpcode()) { + default: llvm_unreachable("Unknown call opcode."); + case SP::CALL: structSizeOpNum = 1; break; + case SP::JMPLrr: + case SP::JMPLri: structSizeOpNum = 2; break; + } + + const MachineOperand &MO = I->getOperand(structSizeOpNum); + if (!MO.isImm()) + return false; + StructSize = MO.getImm(); + return true; +} diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 196b87d..70574c3 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -16,7 +16,9 @@ #include "SparcISelLowering.h" #include "SparcTargetMachine.h" #include "SparcMachineFunctionInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/Module.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -116,6 +118,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); } + + unsigned RetAddrOffset = 8; //Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 if (MF.getFunction()->hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); @@ -127,11 +131,16 @@ SparcTargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); if (MF.getRegInfo().liveout_empty()) MF.getRegInfo().addLiveOut(SP::I0); + RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } + SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32); + if (Flag.getNode()) - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode, Flag); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode); } /// LowerFormalArguments - V8 uses a very simple ABI, where all values are @@ -194,7 +203,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, false, false, 0); } else { unsigned loReg = MF.addLiveIn(NextVA.getLocReg(), - &SP::IntRegsRegClass, dl); + &SP::IntRegsRegClass); LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32); } SDValue WholeValue = @@ -393,6 +402,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> MemOpChains; const unsigned StackOffset = 92; + bool hasStructRetAttr = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size(); i != e; @@ -433,6 +443,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); + hasStructRetAttr = true; continue; } @@ -546,6 +557,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0; + // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. @@ -559,6 +572,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); + if (hasStructRetAttr) + Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; if (Reg >= SP::I0 && Reg <= SP::I7) @@ -600,7 +615,29 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, return Chain; } +unsigned +SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const +{ + const Function *CalleeFn = 0; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + CalleeFn = dyn_cast<Function>(G->getGlobal()); + } else if (ExternalSymbolSDNode *E = + dyn_cast<ExternalSymbolSDNode>(Callee)) { + const Function *Fn = DAG.getMachineFunction().getFunction(); + const Module *M = Fn->getParent(); + CalleeFn = M->getFunction(E->getSymbol()); + } + + if (!CalleeFn) + return 0; + assert(CalleeFn->hasStructRetAttr() && + "Callee does not have the StructRet attribute."); + + const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); + const Type *ElementTy = Ty->getElementType(); + return getTargetData()->getTypeAllocSize(ElementTy); +} //===----------------------------------------------------------------------===// // TargetLowering Implementation diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h index 849e401..7d02df8 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -101,6 +101,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td index 1072323..cf5c48f 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -124,7 +124,8 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, +def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, @@ -132,7 +133,7 @@ def flushw : SDNode<"SPISD::FLUSHW", SDTNone, def getPCX : Operand<i32> { let PrintMethod = "printGetPCX"; -} +} //===----------------------------------------------------------------------===// // SPARC Flag Conditions @@ -232,6 +233,9 @@ let hasSideEffects = 1, mayStore = 1 in { [(flushw)]>; } +def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), + "unimp $val", []>; + // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the // fpmover pass. let Predicates = [HasNoV9] in { // Only emit these in V8 mode. @@ -292,11 +296,13 @@ let usesCustomInserter = 1, Uses = [FCC] in { // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { - let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in - def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; + let rd = O7.Num, rs1 = G0.Num in + def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %o7+$val", [(retflag simm13:$val)]>; - let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in - def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>; + let rd = I7.Num, rs1 = G0.Num in + def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %i7+$val", []>; } // Section B.1 - Load Integer Instructions, p. 90 diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index d694f2e..90939c3 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -59,9 +59,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // Compute derived properties from the register classes computeRegisterProperties(); - // Set shifts properties - setShiftAmountType(MVT::i64); - // Provide all sorts of operation actions setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 51d2df3..3019242 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -57,6 +57,8 @@ namespace llvm { public: explicit SystemZTargetLowering(SystemZTargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 1cac07a..8fe549b 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } + // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". + if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to // "shift <op>". diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 691e2d7..f777756 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, switch (insn.displacementSize) { default: break; - case 8: + case 1: type = TYPE_MOFFS8; break; - case 16: + case 2: type = TYPE_MOFFS16; break; - case 32: + case 4: type = TYPE_MOFFS32; break; - case 64: + case 8: type = TYPE_MOFFS64; break; } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 4f4fbcd..d0dc8b5 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -399,7 +399,7 @@ struct InternalInstruction { /* The segment override type */ SegmentOverride segmentOverride; - /* Sizes of various critical pieces of data */ + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index 9d42ac2..6fa9284 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; - // Can't handle TLS or DLLImport. + // Can't handle DLLImport. + if (GV->hasDLLImportLinkage()) + return false; + + // Can't handle TLS. if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + if (GVar->isThreadLocal()) return false; // Okay, we've committed to selecting this global. Set up the basic address. diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 27024b4..2f49dbc 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -56,10 +55,6 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt<bool> -Disable256Bit("disable-256bit", cl::Hidden, - cl::desc("Disable use of 256-bit vectors")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. - setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass, dl); + X86::GR64RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<SDValue, 11> SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], - X86::VR128RegisterClass, dl); + X86::VR128RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, - DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); + DAG.getConstant(NumBits, + TLI.getShiftAmountTy(SrcOp.getValueType())))); } SDValue @@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !Disable256Bit && + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && !ISD::isBuildVectorAllZeros(Op.getNode())) { SmallVector<SDValue, 8> V; V.resize(NumElems); for (unsigned i = 0; i < NumElems; ++i) { V[i] = Op.getOperand(i); } - + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build the lower subvector. @@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DAG.getIntPtrConstant(Elt1 / 2)); if ((Elt1 & 1) == 0) InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt.getValueType()))); else if (Elt0 >= 0) InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, DAG.getConstant(0xFF00, MVT::i16)); @@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); if ((Elt0 & 1) != 0) InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt0.getValueType()))); else if (Elt1 >= 0) InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, DAG.getConstant(0x00FF, MVT::i16)); @@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; - + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue ScaledN2 = N2; if (Upper) ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / + DAG.getConstant(NumElems / (VT.getSizeInBits() / 128), N2.getValueType())); Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, @@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; + case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; + case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 419da37..6ec4a7d 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -159,16 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + /// PANDN - and with not'd value. PANDN, - + /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, - + PSIGNB, PSIGNW, PSIGND, + /// PBLENDVB - Variable blend PBLENDVB, - + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -212,7 +212,7 @@ namespace llvm { // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -248,6 +248,10 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, + VUNPCKLPS, + VUNPCKLPD, + VUNPCKLPSY, + VUNPCKLPDY, UNPCKHPS, UNPCKHPD, PUNPCKLBW, @@ -463,6 +467,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td index 344c14c..0660072 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td @@ -41,6 +41,8 @@ def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; +def MRM_D0 : Format<45>; +def MRM_D1 : Format<46>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index ceb1b65..76a9b12 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, @@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, 16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, { X86::MAXPSrr, X86::MAXPSrm, 16 }, diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h index 1d44207..fcb5a25 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,8 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -577,6 +579,8 @@ namespace X86II { case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: return -1; } } diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index 87dc4be..f832a7c 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">; def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; + def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td index 1a58ba0..6a24d14 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td @@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +let Defs = [RDX, RAX], Uses = [RCX] in + def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; + +let Uses = [RDX, RAX, RCX] in + def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp index e6dc74e..0e3b571 100644 --- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp @@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitByte(0xF9, CurByte, OS); break; + case X86II::MRM_D0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD0, CurByte, OS); + break; + case X86II::MRM_D1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD1, CurByte, OS); + break; } // If there is a remaining operand, it must be a trailing immediate. Emit it diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index de76856..1ee7312 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and - // for all 64-bit targets. - if (isTargetDarwin() || isTargetLinux() || Is64Bit) + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both + // 32 and 64 bit) and for all 64-bit targets. + if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index 8a119b4..0a62a02 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -166,6 +166,8 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } + bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 828d6f9..4817787 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -42,9 +42,9 @@ using namespace llvm; const char *XCoreTargetLowering:: -getTargetNodeName(unsigned Opcode) const +getTargetNodeName(unsigned Opcode) const { - switch (Opcode) + switch (Opcode) { case XCoreISD::BL : return "XCoreISD::BL"; case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; @@ -77,7 +77,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Division is expensive setIntDivIsCheap(false); - setShiftAmountType(MVT::i32); setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(Sched::RegPressure); @@ -95,7 +94,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Stop the combiner recombining select and set_cc setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - + // 64bit setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i64, Custom); @@ -106,14 +105,14 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - + // Bit Manipulation setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); - + setOperationAction(ISD::TRAP, MVT::Other, Legal); - + // Jump tables. setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -122,7 +121,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Thread Local Storage setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - + // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -143,7 +142,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - + // Dynamic stack setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -163,7 +162,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) + switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -414,7 +413,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); DebugLoc DL = Op.getDebugLoc(); - + SDValue Base; int64_t Offset; if (!LD->isVolatile() && @@ -437,10 +436,10 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - + SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, LowAddr, MachinePointerInfo(), false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, @@ -453,7 +452,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + if (LD->getAlignment() == 2) { SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, LD->getPointerInfo(), MVT::i16, @@ -473,16 +472,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + // Lower to a call to __misaligned_load(BasePtr). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, IntPtrTy, false, false, false, false, 0, CallingConv::C, false, @@ -515,7 +514,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); DebugLoc dl = Op.getDebugLoc(); - + if (ST->getAlignment() == 2) { SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, @@ -532,19 +531,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } - + // Lower to a call to __misaligned_store(BasePtr, Value). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + Entry.Node = Value; Args.push_back(Entry); - + std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, @@ -722,7 +721,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const } DebugLoc dl = N->getDebugLoc(); - + // Extract components SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, MVT::i32)); @@ -732,7 +731,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const N->getOperand(1), DAG.getConstant(0, MVT::i32)); SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(1), DAG.getConstant(1, MVT::i32)); - + // Expand unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; @@ -740,7 +739,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSL, RHSL, Zero); SDValue Lo(Carry.getNode(), 1); - + SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSH, RHSH, Carry); SDValue Hi(Ignored.getNode(), 1); @@ -761,8 +760,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Node->getOperand(1), MachinePointerInfo(V), false, false, 0); // Increment the pointer, VAList, to the next vararg - SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, - DAG.getConstant(VT.getSizeInBits(), + SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, + DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), @@ -781,20 +780,20 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const MachineFunction &MF = DAG.getMachineFunction(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), MachinePointerInfo(), false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! + // Depths > 0 not supported yet! if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); - + MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, RegInfo->getFrameRegister(MF), MVT::i32); } @@ -919,7 +918,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy(), true)); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; @@ -944,8 +943,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } - - // Arguments that can be passed on register must be kept at + + // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -954,7 +953,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, int Offset = VA.getLocMemOffset(); - MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, + MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, Chain, Arg, DAG.getConstant(Offset/4, MVT::i32))); } @@ -963,16 +962,16 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token + // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -986,7 +985,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); // XCoreBranchLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... + // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -994,7 +993,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Ops.push_back(Chain); Ops.push_back(Callee); - // Add argument registers to the end of the list so that they are + // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, @@ -1098,11 +1097,11 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize(); unsigned LRSaveSize = StackSlotSize; - + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); @@ -1139,12 +1138,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } - + if (isVarArg) { /* Argument registers */ static const unsigned ArgRegs[] = { @@ -1186,7 +1185,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, true)); } } - + return Chain; } @@ -1222,7 +1221,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); - // If this is the first return lowered for this function, add + // If this is the first return lowered for this function, add // the regs to the liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) @@ -1237,7 +1236,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are @@ -1265,7 +1264,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && "Unexpected instr type to insert"); - + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to @@ -1273,7 +1272,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; - + // thisMBB: // ... // TrueVal = ... @@ -1296,7 +1295,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); @@ -1304,10 +1303,10 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; - + // Update machine-CFG edges BB->addSuccessor(sinkMBB); - + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -1316,7 +1315,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1354,7 +1353,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1377,7 +1376,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N0.getValueType(); // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set - if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { + if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1393,7 +1392,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1557,7 +1556,7 @@ static inline bool isImmUs4(int64_t val) /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool -XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, +XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); @@ -1568,7 +1567,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs%4 == 0; } - + switch (Size) { case 1: // reg + imm @@ -1593,7 +1592,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // reg + reg<<2 return AM.Scale == 4 && AM.BaseOffs == 0; } - + return false; } @@ -1603,7 +1602,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::vector<unsigned> XCoreTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const + EVT VT) const { if (Constraint.size() != 1) return std::vector<unsigned>(); @@ -1611,9 +1610,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { default : break; case 'r': - return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2, - XCore::R3, XCore::R4, XCore::R5, - XCore::R6, XCore::R7, XCore::R8, + return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2, + XCore::R3, XCore::R4, XCore::R5, + XCore::R6, XCore::R7, XCore::R8, XCore::R9, XCore::R10, XCore::R11, 0); break; } diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h index 7e5dd2e..bb3f2cc 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -20,11 +20,11 @@ #include "XCore.h" namespace llvm { - + // Forward delcarations class XCoreSubtarget; class XCoreTargetMachine; - + namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. @@ -38,16 +38,16 @@ namespace llvm { // dp relative address DPRelativeWrapper, - + // cp relative address CPRelativeWrapper, - + // Store word to stack STWSP, // Corresponds to retsp instruction RETSP, - + // Corresponds to LADD instruction LADD, @@ -74,13 +74,14 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - class XCoreTargetLowering : public TargetLowering + class XCoreTargetLowering : public TargetLowering { public: explicit XCoreTargetLowering(XCoreTargetMachine &TM); virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -91,10 +92,10 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const; - /// getTargetNodeName - This method returns the name of a target specific + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -108,7 +109,7 @@ namespace llvm { private: const XCoreTargetMachine &TM; const XCoreSubtarget &Subtarget; - + // Lower Operand helpers SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -148,12 +149,12 @@ namespace llvm { SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - + // Inline asm support std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td index 38cc734..ecdd4cb 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -727,7 +727,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp, +// TODO setd, eet, eef, testwct, tinitpc, tinitdp, // tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in @@ -758,6 +758,14 @@ def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; +def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "getts $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>; + +def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "setpt res[$r], $val", + [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>; + def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "outct res[$r], $val", [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; @@ -774,6 +782,11 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "out res[$r], $val", [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; +let Constraints = "$src = $dst" in +def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "outshr res[$r], $src", + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; + def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "inct $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; @@ -786,6 +799,11 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "in $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>; +let Constraints = "$src = $dst" in +def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "inshr $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; + def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "chkct res[$r], $val", [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; @@ -799,7 +817,7 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; // Two operand long -// TODO settw, setclk, setrdy, setpsc, endin, peek, +// TODO setclk, setrdy, setpsc, endin, peek, // getd, testlcl, tinitlr, getps, setps def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", @@ -813,13 +831,17 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; -def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val), +def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setc res[$r], $val", [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; +def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "settw res[$r], $val", + [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp -// setdp, setcp, setv, setev, kcall +// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp +// setdp, setcp, setev, kcall // dgetreg let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), @@ -859,20 +881,41 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), [(XCoreBranchLink GRRegs:$addr)]>; } +def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r), + "syncr res[$r]", + [(int_xcore_syncr GRRegs:$r)]>; + def FREER_1r : _F1R<(outs), (ins GRRegs:$r), "freer res[$r]", [(int_xcore_freer GRRegs:$r)]>; +let Uses=[R11] in +def SETV_1r : _F1R<(outs), (ins GRRegs:$r), + "setv res[$r], r11", + [(int_xcore_setv GRRegs:$r, R11)]>; + +def EEU_1r : _F1R<(outs), (ins GRRegs:$r), + "eeu res[$r]", + [(int_xcore_eeu GRRegs:$r)]>; + // Zero operand short -// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp +def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; + let Defs = [R11] in def GETID_0R : _F0R<(outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, + hasSideEffects = 1 in +def WAITEU_0R : _F0R<(outs), (ins), + "waiteu", + [(brind (int_xcore_waitevent))]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// |