diff options
Diffstat (limited to 'contrib/llvm/lib/Target')
39 files changed, 732 insertions, 152 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index c0e2235..2c887a9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -220,27 +220,27 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { default: return None; + case AArch64::LD1i64: + case AArch64::LD2i64: + DestRegIdx = 0; + BaseRegIdx = 3; + OffsetIdx = -1; + IsPrePost = false; + break; + case AArch64::LD1i8: case AArch64::LD1i16: case AArch64::LD1i32: - case AArch64::LD1i64: case AArch64::LD2i8: case AArch64::LD2i16: case AArch64::LD2i32: - case AArch64::LD2i64: case AArch64::LD3i8: case AArch64::LD3i16: case AArch64::LD3i32: + case AArch64::LD3i64: case AArch64::LD4i8: case AArch64::LD4i16: case AArch64::LD4i32: - DestRegIdx = 0; - BaseRegIdx = 3; - OffsetIdx = -1; - IsPrePost = false; - break; - - case AArch64::LD3i64: case AArch64::LD4i64: DestRegIdx = -1; BaseRegIdx = 3; @@ -264,23 +264,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { case AArch64::LD1Rv4s: case AArch64::LD1Rv8h: case AArch64::LD1Rv16b: - case AArch64::LD1Twov1d: - case AArch64::LD1Twov2s: - case AArch64::LD1Twov4h: - case AArch64::LD1Twov8b: - case AArch64::LD2Twov2s: - case AArch64::LD2Twov4s: - case AArch64::LD2Twov8b: - case AArch64::LD2Rv1d: - case AArch64::LD2Rv2s: - case AArch64::LD2Rv4s: - case AArch64::LD2Rv8b: DestRegIdx = 0; BaseRegIdx = 1; OffsetIdx = -1; IsPrePost = false; break; + case AArch64::LD1Twov1d: + case AArch64::LD1Twov2s: + case AArch64::LD1Twov4h: + case AArch64::LD1Twov8b: case AArch64::LD1Twov2d: case AArch64::LD1Twov4s: case AArch64::LD1Twov8h: @@ -301,10 +294,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { case AArch64::LD1Fourv4s: case AArch64::LD1Fourv8h: case AArch64::LD1Fourv16b: + case AArch64::LD2Twov2s: + case AArch64::LD2Twov4s: + case AArch64::LD2Twov8b: case AArch64::LD2Twov2d: case AArch64::LD2Twov4h: case AArch64::LD2Twov8h: case AArch64::LD2Twov16b: + case AArch64::LD2Rv1d: + case AArch64::LD2Rv2s: + case AArch64::LD2Rv4s: + case AArch64::LD2Rv8b: case AArch64::LD2Rv2d: case AArch64::LD2Rv4h: case AArch64::LD2Rv8h: @@ -345,32 +345,32 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { IsPrePost = false; break; + case AArch64::LD1i64_POST: + case AArch64::LD2i64_POST: + DestRegIdx = 1; + BaseRegIdx = 4; + OffsetIdx = 5; + IsPrePost = true; + break; + case AArch64::LD1i8_POST: case AArch64::LD1i16_POST: case AArch64::LD1i32_POST: - case AArch64::LD1i64_POST: case AArch64::LD2i8_POST: case AArch64::LD2i16_POST: case AArch64::LD2i32_POST: - case AArch64::LD2i64_POST: case AArch64::LD3i8_POST: case AArch64::LD3i16_POST: case AArch64::LD3i32_POST: + case AArch64::LD3i64_POST: case AArch64::LD4i8_POST: case AArch64::LD4i16_POST: case AArch64::LD4i32_POST: - DestRegIdx = 1; - BaseRegIdx = 4; - OffsetIdx = 5; - IsPrePost = false; - break; - - case AArch64::LD3i64_POST: case AArch64::LD4i64_POST: DestRegIdx = -1; BaseRegIdx = 4; OffsetIdx = 5; - IsPrePost = false; + IsPrePost = true; break; case AArch64::LD1Onev1d_POST: @@ -389,23 +389,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { case AArch64::LD1Rv4s_POST: case AArch64::LD1Rv8h_POST: case AArch64::LD1Rv16b_POST: - case AArch64::LD1Twov1d_POST: - case AArch64::LD1Twov2s_POST: - case AArch64::LD1Twov4h_POST: - case AArch64::LD1Twov8b_POST: - case AArch64::LD2Twov2s_POST: - case AArch64::LD2Twov4s_POST: - case AArch64::LD2Twov8b_POST: - case AArch64::LD2Rv1d_POST: - case AArch64::LD2Rv2s_POST: - case AArch64::LD2Rv4s_POST: - case AArch64::LD2Rv8b_POST: DestRegIdx = 1; BaseRegIdx = 2; OffsetIdx = 3; - IsPrePost = false; + IsPrePost = true; break; + case AArch64::LD1Twov1d_POST: + case AArch64::LD1Twov2s_POST: + case AArch64::LD1Twov4h_POST: + case AArch64::LD1Twov8b_POST: case AArch64::LD1Twov2d_POST: case AArch64::LD1Twov4s_POST: case AArch64::LD1Twov8h_POST: @@ -426,10 +419,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { case AArch64::LD1Fourv4s_POST: case AArch64::LD1Fourv8h_POST: case AArch64::LD1Fourv16b_POST: + case AArch64::LD2Twov2s_POST: + case AArch64::LD2Twov4s_POST: + case AArch64::LD2Twov8b_POST: case AArch64::LD2Twov2d_POST: case AArch64::LD2Twov4h_POST: case AArch64::LD2Twov8h_POST: case AArch64::LD2Twov16b_POST: + case AArch64::LD2Rv1d_POST: + case AArch64::LD2Rv2s_POST: + case AArch64::LD2Rv4s_POST: + case AArch64::LD2Rv8b_POST: case AArch64::LD2Rv2d_POST: case AArch64::LD2Rv4h_POST: case AArch64::LD2Rv8h_POST: @@ -467,7 +467,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { DestRegIdx = -1; BaseRegIdx = 2; OffsetIdx = 3; - IsPrePost = false; + IsPrePost = true; break; case AArch64::LDRBBroW: @@ -572,8 +572,12 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { IsPrePost = true; break; - case AArch64::LDPDi: + case AArch64::LDNPDi: + case AArch64::LDNPQi: + case AArch64::LDNPSi: case AArch64::LDPQi: + case AArch64::LDPDi: + case AArch64::LDPSi: DestRegIdx = -1; BaseRegIdx = 2; OffsetIdx = 3; @@ -581,7 +585,6 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { break; case AArch64::LDPSWi: - case AArch64::LDPSi: case AArch64::LDPWi: case AArch64::LDPXi: DestRegIdx = 0; @@ -592,18 +595,18 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { case AArch64::LDPQpost: case AArch64::LDPQpre: + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: DestRegIdx = -1; BaseRegIdx = 3; OffsetIdx = 4; IsPrePost = true; break; - case AArch64::LDPDpost: - case AArch64::LDPDpre: case AArch64::LDPSWpost: case AArch64::LDPSWpre: - case AArch64::LDPSpost: - case AArch64::LDPSpre: case AArch64::LDPWpost: case AArch64::LDPWpre: case AArch64::LDPXpost: @@ -687,9 +690,14 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { if (!TII->isStridedAccess(MI)) continue; - LoadInfo LdI = *getLoadInfo(MI); - unsigned OldTag = *getTag(TRI, MI, LdI); - auto &OldCollisions = TagMap[OldTag]; + Optional<LoadInfo> OptLdI = getLoadInfo(MI); + if (!OptLdI) + continue; + LoadInfo LdI = *OptLdI; + Optional<unsigned> OptOldTag = getTag(TRI, MI, LdI); + if (!OptOldTag) + continue; + auto &OldCollisions = TagMap[*OptOldTag]; if (OldCollisions.size() <= 1) continue; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9d87988..9c57926 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9347,11 +9347,20 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) { return SDValue(); } - // Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from - // undoing this transformation. - SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32 - ? DAG.getRegister(AArch64::WZR, MVT::i32) - : DAG.getRegister(AArch64::XZR, MVT::i64); + // Use a CopyFromReg WZR/XZR here to prevent + // DAGCombiner::MergeConsecutiveStores from undoing this transformation. + SDLoc DL(&St); + unsigned ZeroReg; + EVT ZeroVT; + if (VT.getVectorElementType().getSizeInBits() == 32) { + ZeroReg = AArch64::WZR; + ZeroVT = MVT::i32; + } else { + ZeroReg = AArch64::XZR; + ZeroVT = MVT::i64; + } + SDValue SplatVal = + DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT); return splitStoreSplat(DAG, St, SplatVal, NumVecElts); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 5049a39..5971997 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -441,8 +441,7 @@ def MSRpstateImm1 : MSRpstateImm0_1; def MSRpstateImm4 : MSRpstateImm0_15; // The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. -let hasSideEffects = 0 in +// TPIDR_EL0. def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index cd9e7fb..025397b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -218,12 +218,17 @@ void GCNHazardRecognizer::RecedeCycle() { int GCNHazardRecognizer::getWaitStatesSince( function_ref<bool(MachineInstr *)> IsHazard) { - int WaitStates = -1; + int WaitStates = 0; for (MachineInstr *MI : EmittedInstrs) { + if (MI) { + if (IsHazard(MI)) + return WaitStates; + + unsigned Opcode = MI->getOpcode(); + if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF) + continue; + } ++WaitStates; - if (!MI || !IsHazard(MI)) - continue; - return WaitStates; } return std::numeric_limits<int>::max(); } diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 582153d..b24d342 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1276,6 +1276,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) .addReg(0)); + assert(Subtarget->hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) .addReg(MI->getOperand(0).getReg())); return; @@ -1896,6 +1897,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); + assert(Subtarget->hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) .addReg(ScratchReg) // Predicate. diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp index 051827a..a1a31e1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -251,7 +251,9 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { assert(!Val == !VReg && "Return value without a vreg"); - auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL)); + auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>(); + unsigned Opcode = ST.getReturnOpcode(); + auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL)); if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret)) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 46d8f0d..3767277 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1030,8 +1030,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (STI->isThumb()) MIB.add(predOps(ARMCC::AL)); } else if (RetOpcode == ARM::TCRETURNri) { + unsigned Opcode = + STI->isThumb() ? ARM::tTAILJMPr + : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4); BuildMI(MBB, MBBI, dl, - TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) + TII.get(Opcode)) .addReg(JumpTarget.getReg(), RegState::Kill); } diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index bf00ef6..5dc9373 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -1332,6 +1332,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { if (AddrReg == 0) return false; unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; + assert(isThumb2 || Subtarget->hasV4TOps()); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(AddrReg)); @@ -2168,9 +2170,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); } - unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(RetOpc)); + TII.get(Subtarget->getReturnOpcode())); AddOptionalDefs(MIB); for (unsigned R : RetRegs) MIB.addReg(R, RegState::Implicit); diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 16b54e8..00b788a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -479,7 +479,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, if (DPRCSSize > 0) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) { DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); LastPush = MBBI++; } @@ -2397,9 +2397,8 @@ void ARMFrameLowering::adjustForSegmentedStacks( BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - // bx lr - Return from this function. - Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; - BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL)); + // Return from this function. + BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL)); // Restore SR0 and SR1 in case of __morestack() was not called. // pop {SR0, SR1} diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 7206083..c488cd3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -2425,7 +2425,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], (BX GPR:$dst)>, Sched<[WriteBr]>, - Requires<[IsARM]>; + Requires<[IsARM, HasV4T]>; } // Secure Monitor Call is a system instruction. @@ -5589,6 +5589,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in + def TAILJMPr4 : ARMPseudoExpand<(outs), (ins GPR:$dst), + 4, IIC_Br, [], + (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; + // Large immediate handling. // 32-bit immediate using two piece mod_imms or movw + movt. diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 7a452d4..5d57b68 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1909,6 +1909,7 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { for (auto Use : Prev->uses()) if (Use.isKill()) { + assert(STI->hasV4TOps()); BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) .addReg(Use.getReg(), RegState::Kill) .add(predOps(ARMCC::AL)) diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index e15b175..9d74953 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -729,6 +729,17 @@ public: /// True if fast-isel is used. bool useFastISel() const; + + /// Returns the correct return opcode for the current feature set. + /// Use BX if available to allow mixing thumb/arm code, but fall back + /// to plain mov pc,lr on ARMv4. + unsigned getReturnOpcode() const { + if (isThumb()) + return ARM::tBX_RET; + if (hasV4TOps()) + return ARM::BX_RET; + return ARM::MOVPCLR; + } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index b8a8b1f..2ab7bfe 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -142,9 +142,9 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) { if (isThumb) { if (ARMArchFeature.empty()) - ARMArchFeature = "+thumb-mode"; + ARMArchFeature = "+thumb-mode,+v4t"; else - ARMArchFeature += ",+thumb-mode"; + ARMArchFeature += ",+thumb-mode,+v4t"; } if (TT.isOSNaCl()) { diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 540e05a..d6f85ed 100644 --- a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -583,8 +583,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) { unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); - OpLo = AVR::LDRdPtr; - OpHi = AVR::LDDRdPtrQ; + OpLo = AVR::LDRdPtrPi; + OpHi = AVR::LDRdPtr; TRI->splitReg(DstReg, DstLoReg, DstHiReg); // Use a temporary register if src and dst registers are the same. @@ -597,6 +597,7 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) { // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg, RegState::Define) .addReg(SrcReg); // Push low byte onto stack if necessary. @@ -606,8 +607,7 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) { // Load high byte. auto MIBHI = buildMI(MBB, MBBI, OpHi) .addReg(CurDstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(1); + .addReg(SrcReg, getKillRegState(SrcIsKill)); if (TmpReg) { // Move the high byte into the final destination. @@ -699,7 +699,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) { OpHi = AVR::LDDRdPtrQ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(Imm <= 63 && "Offset is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Offset is out of range"); // Use a temporary register if src and dst registers are the same. if (DstReg == SrcReg) @@ -741,7 +743,50 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) { template <> bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) { - llvm_unreachable("wide LPM is unimplemented"); + MachineInstr &MI = *MBBI; + unsigned OpLo, OpHi, DstLoReg, DstHiReg; + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register + unsigned SrcReg = MI.getOperand(1).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + OpLo = AVR::LPMRdZPi; + OpHi = AVR::LPMRdZ; + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); + + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + + // Load low byte. + auto MIBLO = buildMI(MBB, MBBI, OpLo) + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg); + + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + + // Load high byte. + auto MIBHI = buildMI(MBB, MBBI, OpHi) + .addReg(CurDstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + + if (TmpReg) { + // Move the high byte into the final destination. + buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); + + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); + } + + MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + + MI.eraseFromParent(); + return true; } template <> @@ -1074,7 +1119,9 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) { OpHi = AVR::STDPtrQRr; TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); - assert(Imm <= 63 && "Offset is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Offset is out of range"); auto MIBLO = buildMI(MBB, MBBI, OpLo) .addReg(DstReg) @@ -1104,7 +1151,9 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) { OpHi = AVR::INRdA; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(Imm <= 63 && "Address is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Address is out of range"); auto MIBLO = buildMI(MBB, MBBI, OpLo) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -1132,7 +1181,9 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) { OpHi = AVR::OUTARr; TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); - assert(Imm <= 63 && "Address is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Address is out of range"); // 16 bit I/O writes need the high byte first auto MIBHI = buildMI(MBB, MBBI, OpHi) diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp index 7d3faac..d8e8bc1 100644 --- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1469,8 +1469,10 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, } const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator I = BB->getParent()->begin(); - ++I; + + MachineFunction::iterator I; + for (I = F->begin(); I != F->end() && &(*I) != BB; ++I); + if (I != F->end()) ++I; // Create loop block. MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h index b44c62a..85f9552 100644 --- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h @@ -75,6 +75,11 @@ public: MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override { return MVT::i8; } + + MVT::SimpleValueType getCmpLibcallReturnType() const override { + return MVT::i8; + } + const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 744aa72..1a89a13 100644 --- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -537,8 +537,7 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, llvm_unreachable("unexpected opcode!"); case AVR::JMPk: case AVR::CALLk: - assert(BrOffset >= 0 && "offset must be absolute address"); - return isUIntN(16, BrOffset); + return true; case AVR::RCALLk: case AVR::RJMPk: return isIntN(13, BrOffset); @@ -556,5 +555,20 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } } +unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + // This method inserts a *direct* branch (JMP), despite its name. + // LLVM calls this method to fixup unconditional branches; it never calls + // insertBranch or some hypothetical "insertDirectBranch". + // See lib/CodeGen/RegisterRelaxation.cpp for details. + // We end up here when a jump is too long for a RJMP instruction. + auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); + + return getInstSizeInBytes(MI); +} + } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h index f42d34f..eee8a92 100644 --- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -107,6 +107,12 @@ public: bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; + + unsigned insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const override; private: const AVRRegisterInfo RI; }; diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td index 184e4d5..7d1bfc8 100644 --- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1152,10 +1152,10 @@ isReMaterializable = 1 in // // Expands to: // ld Rd, P+ - // ld Rd+1, P+ + // ld Rd+1, P let Constraints = "@earlyclobber $reg" in def LDWRdPtr : Pseudo<(outs DREGS:$reg), - (ins PTRDISPREGS:$ptrreg), + (ins PTRREGS:$ptrreg), "ldw\t$reg, $ptrreg", [(set i16:$reg, (load i16:$ptrreg))]>, Requires<[HasSRAM]>; @@ -1164,7 +1164,7 @@ isReMaterializable = 1 in // Indirect loads (with postincrement or predecrement). let mayLoad = 1, hasSideEffects = 0, -Constraints = "$ptrreg = $base_wb,@earlyclobber $reg,@earlyclobber $base_wb" in +Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in { def LDRdPtrPi : FSTLD<0, 0b01, @@ -1238,35 +1238,55 @@ isReMaterializable = 1 in Requires<[HasSRAM]>; } -class AtomicLoad<PatFrag Op, RegisterClass DRC> : - Pseudo<(outs DRC:$rd), (ins PTRREGS:$rr), "atomic_op", +class AtomicLoad<PatFrag Op, RegisterClass DRC, + RegisterClass PTRRC> : + Pseudo<(outs DRC:$rd), (ins PTRRC:$rr), "atomic_op", [(set DRC:$rd, (Op i16:$rr))]>; -class AtomicStore<PatFrag Op, RegisterClass DRC> : - Pseudo<(outs), (ins PTRDISPREGS:$rd, DRC:$rr), "atomic_op", +class AtomicStore<PatFrag Op, RegisterClass DRC, + RegisterClass PTRRC> : + Pseudo<(outs), (ins PTRRC:$rd, DRC:$rr), "atomic_op", [(Op i16:$rd, DRC:$rr)]>; -class AtomicLoadOp<PatFrag Op, RegisterClass DRC> : - Pseudo<(outs DRC:$rd), (ins PTRREGS:$rr, DRC:$operand), +class AtomicLoadOp<PatFrag Op, RegisterClass DRC, + RegisterClass PTRRC> : + Pseudo<(outs DRC:$rd), (ins PTRRC:$rr, DRC:$operand), "atomic_op", [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>; -def AtomicLoad8 : AtomicLoad<atomic_load_8, GPR8>; -def AtomicLoad16 : AtomicLoad<atomic_load_16, DREGS>; - -def AtomicStore8 : AtomicStore<atomic_store_8, GPR8>; -def AtomicStore16 : AtomicStore<atomic_store_16, DREGS>; - -def AtomicLoadAdd8 : AtomicLoadOp<atomic_load_add_8, GPR8>; -def AtomicLoadAdd16 : AtomicLoadOp<atomic_load_add_16, DREGS>; -def AtomicLoadSub8 : AtomicLoadOp<atomic_load_sub_8, GPR8>; -def AtomicLoadSub16 : AtomicLoadOp<atomic_load_sub_16, DREGS>; -def AtomicLoadAnd8 : AtomicLoadOp<atomic_load_and_8, GPR8>; -def AtomicLoadAnd16 : AtomicLoadOp<atomic_load_and_16, DREGS>; -def AtomicLoadOr8 : AtomicLoadOp<atomic_load_or_8, GPR8>; -def AtomicLoadOr16 : AtomicLoadOp<atomic_load_or_16, DREGS>; -def AtomicLoadXor8 : AtomicLoadOp<atomic_load_xor_8, GPR8>; -def AtomicLoadXor16 : AtomicLoadOp<atomic_load_xor_16, DREGS>; +// FIXME: I think 16-bit atomic binary ops need to mark +// r0 as clobbered. + +// Atomic instructions +// =================== +// +// These are all expanded by AVRExpandPseudoInsts +// +// 8-bit operations can use any pointer register because +// they are expanded directly into an LD/ST instruction. +// +// 16-bit operations use 16-bit load/store postincrement instructions, +// which require PTRDISPREGS. + +def AtomicLoad8 : AtomicLoad<atomic_load_8, GPR8, PTRREGS>; +def AtomicLoad16 : AtomicLoad<atomic_load_16, DREGS, PTRDISPREGS>; + +def AtomicStore8 : AtomicStore<atomic_store_8, GPR8, PTRREGS>; +def AtomicStore16 : AtomicStore<atomic_store_16, DREGS, PTRDISPREGS>; + +class AtomicLoadOp8<PatFrag Op> : AtomicLoadOp<Op, GPR8, PTRREGS>; +class AtomicLoadOp16<PatFrag Op> : AtomicLoadOp<Op, DREGS, PTRDISPREGS>; + +def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>; +def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>; +def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>; +def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>; +def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>; +def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>; +def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>; +def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>; +def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>; +def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>; def AtomicFence : Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence imm, imm)]>; @@ -1397,6 +1417,7 @@ def STDWPtrQRr : Pseudo<(outs), // Load program memory operations. let canFoldAsLoad = 1, isReMaterializable = 1, +mayLoad = 1, hasSideEffects = 0 in { let Defs = [R0], @@ -1417,8 +1438,7 @@ hasSideEffects = 0 in Requires<[HasLPMX]>; // Load program memory, while postincrementing the Z register. - let mayLoad = 1, - Defs = [R31R30] in + let Defs = [R31R30] in { def LPMRdZPi : FLPMX<0, 1, diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 249dc55..7099b29 100644 --- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -203,7 +203,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If the offset is too big we have to adjust and restore the frame pointer // to materialize a valid load/store with displacement. //:TODO: consider using only one adiw/sbiw chain for more than one frame index - if (Offset > 63) { + if (Offset > 62) { unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK; int AddOffset = Offset - 63 + 1; diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp index a9d61ff..e698b6e 100644 --- a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -25,7 +25,7 @@ namespace llvm { -static const char *AVRDataLayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-n8"; +static const char *AVRDataLayout = "e-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"; /// Processes a CPU name. static StringRef getCPU(StringRef CPU) { diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp index a2d8c16..2b45d9a 100644 --- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp @@ -13,6 +13,8 @@ #include "AVRTargetStreamer.h" +#include "llvm/MC/MCContext.h" + namespace llvm { AVRTargetStreamer::AVRTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} @@ -20,5 +22,23 @@ AVRTargetStreamer::AVRTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} AVRTargetAsmStreamer::AVRTargetAsmStreamer(MCStreamer &S) : AVRTargetStreamer(S) {} +void AVRTargetStreamer::finish() { + MCStreamer &OS = getStreamer(); + MCContext &Context = OS.getContext(); + + MCSymbol *DoCopyData = Context.getOrCreateSymbol("__do_copy_data"); + MCSymbol *DoClearBss = Context.getOrCreateSymbol("__do_clear_bss"); + + // FIXME: We can disable __do_copy_data if there are no static RAM variables. + + OS.emitRawComment(" Declaring this symbol tells the CRT that it should"); + OS.emitRawComment("copy all variables from program memory to RAM on startup"); + OS.EmitSymbolAttribute(DoCopyData, MCSA_Global); + + OS.emitRawComment(" Declaring this symbol tells the CRT that it should"); + OS.emitRawComment("clear the zeroed data section on startup"); + OS.EmitSymbolAttribute(DoClearBss, MCSA_Global); +} + } // end namespace llvm diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h index 99a5366..815088b 100644 --- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h +++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h @@ -19,6 +19,8 @@ class MCStreamer; class AVRTargetStreamer : public MCTargetStreamer { public: explicit AVRTargetStreamer(MCStreamer &S); + + void finish() override; }; /// A target streamer for textual AVR assembly code. diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp index 81b0aa7..5740b49 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -578,11 +578,15 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(LHS) .addReg(MI.getOperand(2).getReg()) .addMBB(Copy1MBB); - else + else { + int64_t imm32 = MI.getOperand(2).getImm(); + // sanity check before we build J*_ri instruction. + assert (isInt<32>(imm32)); BuildMI(BB, DL, TII.get(NewCC)) .addReg(LHS) - .addImm(MI.getOperand(2).getImm()) + .addImm(imm32) .addMBB(Copy1MBB); + } // Copy0MBB: // %FalseValue = ... diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td index f683578..59e92f8 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -464,7 +464,7 @@ let usesCustomInserter = 1 in { (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", [(set i64:$dst, - (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; + (BPFselectcc i64:$lhs, (i64 i64immSExt32:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; } // load 64-bit global addr into register diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index e12188e..a294004 100644 --- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -304,6 +304,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -2511,6 +2514,16 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::SEQIMacro: return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + case Mips::MFTC0: case Mips::MTTC0: + case Mips::MFTGPR: case Mips::MTTGPR: + case Mips::MFTLO: case Mips::MTTLO: + case Mips::MFTHI: case Mips::MTTHI: + case Mips::MFTACX: case Mips::MTTACX: + case Mips::MFTDSP: case Mips::MTTDSP: + case Mips::MFTC1: case Mips::MTTC1: + case Mips::MFTHC1: case Mips::MTTHC1: + case Mips::CFTC1: case Mips::CTTC1: + return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; } } @@ -4882,6 +4895,212 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } +// Map the DSP accumulator and control register to the corresponding gpr +// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions +// do not map the DSP registers contigously to gpr registers. +static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) { + switch (Inst.getOpcode()) { + case Mips::MFTLO: + case Mips::MTTLO: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::ZERO; + case Mips::AC1: + return Mips::A0; + case Mips::AC2: + return Mips::T0; + case Mips::AC3: + return Mips::T4; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTHI: + case Mips::MTTHI: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::AT; + case Mips::AC1: + return Mips::A1; + case Mips::AC2: + return Mips::T1; + case Mips::AC3: + return Mips::T5; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTACX: + case Mips::MTTACX: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::V0; + case Mips::AC1: + return Mips::A2; + case Mips::AC2: + return Mips::T2; + case Mips::AC3: + return Mips::T6; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTDSP: + case Mips::MTTDSP: + return Mips::S0; + default: + llvm_unreachable("Unknown instruction for 'mttr' dsp alias!"); + } +} + +// Map the floating point register operand to the corresponding register +// operand. +static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) { + switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) { + case Mips::F0: return Mips::ZERO; + case Mips::F1: return Mips::AT; + case Mips::F2: return Mips::V0; + case Mips::F3: return Mips::V1; + case Mips::F4: return Mips::A0; + case Mips::F5: return Mips::A1; + case Mips::F6: return Mips::A2; + case Mips::F7: return Mips::A3; + case Mips::F8: return Mips::T0; + case Mips::F9: return Mips::T1; + case Mips::F10: return Mips::T2; + case Mips::F11: return Mips::T3; + case Mips::F12: return Mips::T4; + case Mips::F13: return Mips::T5; + case Mips::F14: return Mips::T6; + case Mips::F15: return Mips::T7; + case Mips::F16: return Mips::S0; + case Mips::F17: return Mips::S1; + case Mips::F18: return Mips::S2; + case Mips::F19: return Mips::S3; + case Mips::F20: return Mips::S4; + case Mips::F21: return Mips::S5; + case Mips::F22: return Mips::S6; + case Mips::F23: return Mips::S7; + case Mips::F24: return Mips::T8; + case Mips::F25: return Mips::T9; + case Mips::F26: return Mips::K0; + case Mips::F27: return Mips::K1; + case Mips::F28: return Mips::GP; + case Mips::F29: return Mips::SP; + case Mips::F30: return Mips::FP; + case Mips::F31: return Mips::RA; + default: llvm_unreachable("Unknown register for mttc1 alias!"); + } +} + +// Map the coprocessor operand the corresponding gpr register operand. +static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) { + switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) { + case Mips::COP00: return Mips::ZERO; + case Mips::COP01: return Mips::AT; + case Mips::COP02: return Mips::V0; + case Mips::COP03: return Mips::V1; + case Mips::COP04: return Mips::A0; + case Mips::COP05: return Mips::A1; + case Mips::COP06: return Mips::A2; + case Mips::COP07: return Mips::A3; + case Mips::COP08: return Mips::T0; + case Mips::COP09: return Mips::T1; + case Mips::COP010: return Mips::T2; + case Mips::COP011: return Mips::T3; + case Mips::COP012: return Mips::T4; + case Mips::COP013: return Mips::T5; + case Mips::COP014: return Mips::T6; + case Mips::COP015: return Mips::T7; + case Mips::COP016: return Mips::S0; + case Mips::COP017: return Mips::S1; + case Mips::COP018: return Mips::S2; + case Mips::COP019: return Mips::S3; + case Mips::COP020: return Mips::S4; + case Mips::COP021: return Mips::S5; + case Mips::COP022: return Mips::S6; + case Mips::COP023: return Mips::S7; + case Mips::COP024: return Mips::T8; + case Mips::COP025: return Mips::T9; + case Mips::COP026: return Mips::K0; + case Mips::COP027: return Mips::K1; + case Mips::COP028: return Mips::GP; + case Mips::COP029: return Mips::SP; + case Mips::COP030: return Mips::FP; + case Mips::COP031: return Mips::RA; + default: llvm_unreachable("Unknown register for mttc0 alias!"); + } +} + +/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing +/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits. +bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned rd = 0; + unsigned u = 1; + unsigned sel = 0; + unsigned h = 0; + bool IsMFTR = false; + switch (Inst.getOpcode()) { + case Mips::MFTC0: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTC0: + u = 0; + rd = getRegisterForMxtrC0(Inst, IsMFTR); + sel = Inst.getOperand(2).getImm(); + break; + case Mips::MFTGPR: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTGPR: + rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg(); + break; + case Mips::MFTLO: + case Mips::MFTHI: + case Mips::MFTACX: + case Mips::MFTDSP: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTLO: + case Mips::MTTHI: + case Mips::MTTACX: + case Mips::MTTDSP: + rd = getRegisterForMxtrDSP(Inst, IsMFTR); + sel = 1; + break; + case Mips::MFTHC1: + h = 1; + LLVM_FALLTHROUGH; + case Mips::MFTC1: + IsMFTR = true; + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 2; + break; + case Mips::MTTHC1: + h = 1; + LLVM_FALLTHROUGH; + case Mips::MTTC1: + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 2; + break; + case Mips::CFTC1: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::CTTC1: + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 3; + break; + } + unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd; + unsigned Op1 = + IsMFTR ? rd + : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg() + : Inst.getOperand(0).getReg()); + + TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc, + STI); + return false; +} + unsigned MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { @@ -5793,14 +6012,21 @@ OperandMatchResultTy MipsAsmParser::parseInvNum(OperandVector &Operands) { MCAsmParser &Parser = getParser(); const MCExpr *IdVal; - // If the first token is '$' we may have register operand. - if (Parser.getTok().is(AsmToken::Dollar)) - return MatchOperand_NoMatch; + // If the first token is '$' we may have register operand. We have to reject + // cases where it is not a register. Complicating the matter is that + // register names are not reserved across all ABIs. + // Peek past the dollar to see if it's a register name for this ABI. SMLoc S = Parser.getTok().getLoc(); + if (Parser.getTok().is(AsmToken::Dollar)) { + return matchCPURegisterName(Parser.getLexer().peekTok().getString()) == -1 + ? MatchOperand_ParseFail + : MatchOperand_NoMatch; + } if (getParser().parseExpression(IdVal)) return MatchOperand_ParseFail; const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal); - assert(MCE && "Unexpected MCExpr type."); + if (!MCE) + return MatchOperand_NoMatch; int64_t Val = MCE->getValue(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm( diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index aad6bf3..0bddba7 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -246,8 +246,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { break; case MEK_CALL_HI16: case MEK_CALL_LO16: - case MEK_DTPREL_HI: - case MEK_DTPREL_LO: case MEK_GOT: case MEK_GOT_CALL: case MEK_GOT_DISP: @@ -263,14 +261,16 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { case MEK_NEG: case MEK_PCREL_HI16: case MEK_PCREL_LO16: - case MEK_TLSLDM: // If we do have nested target-specific expressions, they will be in // a consecutive chain. if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr())) E->fixELFSymbolsInTLSFixups(Asm); break; - case MEK_GOTTPREL: + case MEK_DTPREL_HI: + case MEK_DTPREL_LO: + case MEK_TLSLDM: case MEK_TLSGD: + case MEK_GOTTPREL: case MEK_TPREL_HI: case MEK_TPREL_LO: fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 2907b77..7caeb08 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -193,6 +193,21 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); } +void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int16_t Imm0, int16_t Imm1, + int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createImm(Imm0)); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().EmitInstruction(TmpInst, *STI); +} + void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI) { diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td index f82f82f..20c1ab5 100644 --- a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td @@ -415,6 +415,13 @@ class BITREV_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"bitrev", int_mips_bitrev, class BPOSGE32_MM_DESC : BPOSGE32_DESC_BASE<"bposge32", brtarget_mm, NoItinerary>; +let DecoderNamespace = "MicroMipsDSP", Arch = "mmdsp", + AdditionalPredicates = [HasDSP, InMicroMips] in { + def LWDSP_MM : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel, + LW_FM_MM<0x3f>; + def SWDSP_MM : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel, + LW_FM_MM<0x3e>; +} // Instruction defs. // microMIPS DSP Rev 1 def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC; diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td index c238a65..2595333 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1284,6 +1284,12 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>; } +let DecoderNamespace = "MipsDSP", Arch = "dsp", + AdditionalPredicates = [HasDSP] in { + def LWDSP : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel, LW_FM<0x23>; + def SWDSP : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel, LW_FM<0x2b>; +} + // Pseudo CMP and PICK instructions. class PseudoCMP<Instruction RealInst> : PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>, diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp index ef05166..27a8597 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp @@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const { return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); } +// Estimate the size of the stack, including the incoming arguments. We need to +// account for register spills, local objects, reserved call frame and incoming +// arguments. This is required to determine the largest possible positive offset +// from $sp so that it can be determined if an emergency spill slot for stack +// addresses is required. uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - int64_t Offset = 0; + int64_t Size = 0; - // Iterate over fixed sized objects. + // Iterate over fixed sized objects which are incoming arguments. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) - Offset = std::max(Offset, -MFI.getObjectOffset(I)); + if (MFI.getObjectOffset(I) > 0) + Size += MFI.getObjectSize(I); // Conservatively assume all callee-saved registers will be saved. for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { - unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); - Offset = alignTo(Offset + Size, Size); + unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); + Size = alignTo(Size + RegSize, RegSize); } - unsigned MaxAlign = MFI.getMaxAlignment(); - - // Check that MaxAlign is not zero if there is a stack object that is not a - // callee-saved spill. - assert(!MFI.getObjectIndexEnd() || MaxAlign); - - // Iterate over other objects. - for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) - Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign); - - // Call frame. - if (MFI.adjustsStack() && hasReservedCallFrame(MF)) - Offset = alignTo(Offset + MFI.getMaxCallFrameSize(), - std::max(MaxAlign, getStackAlignment())); - - return alignTo(Offset, getStackAlignment()); + // Get the size of the rest of the frame objects and any possible reserved + // call frame, accounting for alignment. + return Size + MFI.estimateStackSize(MF); } // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions diff --git a/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td index 64bee5b..edc0981 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td @@ -35,6 +35,8 @@ class FIELD5<bits<5> Val> { def FIELD5_1_DMT_EMT : FIELD5<0b00001>; def FIELD5_2_DMT_EMT : FIELD5<0b01111>; def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>; +def FIELD5_MFTR : FIELD5<0b01000>; +def FIELD5_MTTR : FIELD5<0b01100>; class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst { bits<32> Inst; @@ -50,6 +52,25 @@ class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst { let Inst{2-0} = 0b001; } +class COP0_MFTTR_MT<FIELD5 Op> : MipsMTInst { + bits<32> Inst; + + bits<5> rt; + bits<5> rd; + bits<1> u; + bits<1> h; + bits<3> sel; + let Inst{31-26} = 0b010000; // COP0 + let Inst{25-21} = Op.Value; // MFMC0 + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; // rx - currently unsupported. + let Inst{5} = u; + let Inst{4} = h; + let Inst{3} = 0b0; + let Inst{2-0} = sel; +} + class SPECIAL3_MT_FORK : MipsMTInst { bits<32> Inst; diff --git a/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td index ab6693f..72e626c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td @@ -6,6 +6,13 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file describes the MIPS MT ASE as defined by MD00378 1.12. +// +// TODO: Add support for the microMIPS encodings for the MT ASE and add the +// instruction mappings. +// +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // MIPS MT Instruction Encodings @@ -27,6 +34,10 @@ class FORK_ENC : SPECIAL3_MT_FORK; class YIELD_ENC : SPECIAL3_MT_YIELD; +class MFTR_ENC : COP0_MFTTR_MT<FIELD5_MFTR>; + +class MTTR_ENC : COP0_MFTTR_MT<FIELD5_MTTR>; + //===----------------------------------------------------------------------===// // MIPS MT Instruction Descriptions //===----------------------------------------------------------------------===// @@ -39,6 +50,22 @@ class MT_1R_DESC_BASE<string instr_asm, InstrItinClass Itin = NoItinerary> { InstrItinClass Itinerary = Itin; } +class MFTR_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); + string AsmString = "mftr\t$rd, $rt, $u, $sel, $h"; + list<dag> Pattern = []; + InstrItinClass Itinerary = II_MFTR; +} + +class MTTR_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); + string AsmString = "mttr\t$rt, $rd, $u, $sel, $h"; + list<dag> Pattern = []; + InstrItinClass Itinerary = II_MTTR; +} + class FORK_DESC { dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd); dag InOperandList = (ins GPR32Opnd:$rt); @@ -79,9 +106,74 @@ let hasSideEffects = 1, isNotDuplicable = 1, def FORK : FORK_ENC, FORK_DESC, ASE_MT; def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT; + + def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT; + + def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT; } //===----------------------------------------------------------------------===// +// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases. +//===----------------------------------------------------------------------===// +def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt, + uimm3:$sel), + "mftc0 $rd, $rt, $sel">, ASE_MT; + +def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt, + uimm3:$sel), + "mftgpr $rd, $rt">, ASE_MT; + +def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mftlo $rt, $ac">, ASE_MT; + +def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mfthi $rt, $ac">, ASE_MT; + +def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mftacx $rt, $ac">, ASE_MT; + +def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins), + "mftdsp $rt">, ASE_MT; + +def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), + "mftc1 $rt, $ft">, ASE_MT; + +def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), + "mfthc1 $rt, $ft">, ASE_MT; + +def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft), + "cftc1 $rt, $ft">, ASE_MT; + + +def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt, + uimm3:$sel), + "mttc0 $rt, $rd, $sel">, ASE_MT; + +def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd), + "mttgpr $rd, $rt">, ASE_MT; + +def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mttlo $rt, $ac">, ASE_MT; + +def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mtthi $rt, $ac">, ASE_MT; + +def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mttacx $rt, $ac">, ASE_MT; + +def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt), + "mttdsp $rt">, ASE_MT; + +def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), + "mttc1 $rt, $ft">, ASE_MT; + +def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), + "mtthc1 $rt, $ft">, ASE_MT; + +def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt), + "cttc1 $rt, $ft">, ASE_MT; + +//===----------------------------------------------------------------------===// // MIPS MT Instruction Definitions //===----------------------------------------------------------------------===// @@ -95,4 +187,22 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT; def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT; + + def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0), + 1>, ASE_MT; + + def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0), + 1>, ASE_MT; + + def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT; + + def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT; + + def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT; } diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 102ebb2..735461c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -894,10 +894,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, } // Set scavenging frame index if necessary. - uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + - estimateStackSize(MF); + uint64_t MaxSPOffset = estimateStackSize(MF); - if (isInt<16>(MaxSPOffset)) + // MSA has a minimum offset of 10 bits signed. If there is a variable + // sized object on the stack, the estimation cannot account for it. + if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) && + !MF.getFrameInfo().hasVarSizedObjects()) return; const TargetRegisterClass &RC = diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index ee07479..d2c2169 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -226,6 +226,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; + else if (Mips::DSPRRegClass.hasSubClassEq(RC)) + Opc = Mips::SWDSP; // Hi, Lo are normally caller save but they are callee save // for interrupt handling. @@ -302,6 +304,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; + else if (Mips::DSPRRegClass.hasSubClassEq(RC)) + Opc = Mips::LWDSP; assert(Opc && "Register class not handled!"); diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td index c2947bb..8ec55ab 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td +++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td @@ -226,6 +226,7 @@ def II_MFC1 : InstrItinClass; def II_MFHC1 : InstrItinClass; def II_MFC2 : InstrItinClass; def II_MFHI_MFLO : InstrItinClass; // mfhi and mflo +def II_MFTR : InstrItinClass; def II_MOD : InstrItinClass; def II_MODU : InstrItinClass; def II_MOVE : InstrItinClass; @@ -255,6 +256,7 @@ def II_MTC1 : InstrItinClass; def II_MTHC1 : InstrItinClass; def II_MTC2 : InstrItinClass; def II_MTHI_MTLO : InstrItinClass; // mthi and mtlo +def II_MTTR : InstrItinClass; def II_MUL : InstrItinClass; def II_MUH : InstrItinClass; def II_MUHU : InstrItinClass; @@ -664,12 +666,14 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<II_MFHC0 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MFC1 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MFC2 , [InstrStage<2, [ALU]>]>, + InstrItinData<II_MFTR , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTC0 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTHC0 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTC1 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTC2 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MFHC1 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTHC1 , [InstrStage<2, [ALU]>]>, + InstrItinData<II_MTTR , [InstrStage<2, [ALU]>]>, InstrItinData<II_CACHE , [InstrStage<1, [ALU]>]>, InstrItinData<II_PREF , [InstrStage<1, [ALU]>]>, InstrItinData<II_CACHEE , [InstrStage<1, [ALU]>]>, diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td index 89cda67..e4c52a4 100644 --- a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td +++ b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td @@ -268,9 +268,11 @@ def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>; // MIPS MT instructions // ==================== -def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE]>; +def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE, II_MFTR, + II_MTTR]>; def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>; + def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>; // MIPS32R6 and MIPS16e diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h index 7d9f99c..af24838 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h +++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h @@ -119,6 +119,9 @@ public: SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, + int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI); void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 957b46c40..607bc45 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7026,6 +7026,18 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { return DAG.getTargetConstant(1, dl, VT); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + // Split the pieces. + SDValue Lower = + DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32)); + SDValue Upper = + DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); + // We have to manually lower both halves so getNode doesn't try to + // reassemble the build_vector. + Lower = LowerBUILD_VECTORvXi1(Lower, DAG); + Upper = LowerBUILD_VECTORvXi1(Upper, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); + } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, Imm); @@ -34733,6 +34745,11 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y)) return SDValue(); + // Bail out if we know that this is not really just an oversized integer. + if (peekThroughBitcasts(X).getValueType() == MVT::f128 || + peekThroughBitcasts(Y).getValueType() == MVT::f128) + return SDValue(); + // TODO: Use PXOR + PTEST for SSE4.1 or later? // TODO: Add support for AVX-512. EVT VT = SetCC->getValueType(0); |