diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 519 |
1 files changed, 343 insertions, 176 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 2b7645a..d9dc5cd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -128,45 +128,153 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -static int getLoadStoreMultipleOpcode(int Opcode) { +static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { - case ARM::LDR: + default: llvm_unreachable("Unhandled opcode!"); + case ARM::LDRi12: ++NumLDMGened; - return ARM::LDM; - case ARM::STR: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::LDMIA; + case ARM_AM::da: return ARM::LDMDA; + case ARM_AM::db: return ARM::LDMDB; + case ARM_AM::ib: return ARM::LDMIB; + } + break; + case ARM::STRi12: ++NumSTMGened; - return ARM::STM; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::STMIA; + case ARM_AM::da: return ARM::STMDA; + case ARM_AM::db: return ARM::STMDB; + case ARM_AM::ib: return ARM::STMIB; + } + break; case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; - return ARM::t2LDM; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2LDMIA; + case ARM_AM::db: return ARM::t2LDMDB; + } + break; case ARM::t2STRi8: case ARM::t2STRi12: ++NumSTMGened; - return ARM::t2STM; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2STMIA; + case ARM_AM::db: return ARM::t2STMDB; + } + break; case ARM::VLDRS: ++NumVLDMGened; - return ARM::VLDMS; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMSIA; + case ARM_AM::db: return ARM::VLDMSDB; + } + break; case ARM::VSTRS: ++NumVSTMGened; - return ARM::VSTMS; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMSIA; + case ARM_AM::db: return ARM::VSTMSDB; + } + break; case ARM::VLDRD: ++NumVLDMGened; - return ARM::VLDMD; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMDIA; + case ARM_AM::db: return ARM::VLDMDDB; + } + break; case ARM::VSTRD: ++NumVSTMGened; - return ARM::VSTMD; - default: llvm_unreachable("Unhandled opcode!"); + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMDIA; + case ARM_AM::db: return ARM::VSTMDDB; + } + break; } + return 0; } +namespace llvm { + namespace ARM_AM { + +AMSubMode getLoadStoreMultipleSubMode(int Opcode) { + switch (Opcode) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::LDMIA_RET: + case ARM::LDMIA: + case ARM::LDMIA_UPD: + case ARM::STMIA: + case ARM::STMIA_UPD: + case ARM::t2LDMIA_RET: + case ARM::t2LDMIA: + case ARM::t2LDMIA_UPD: + case ARM::t2STMIA: + case ARM::t2STMIA_UPD: + case ARM::VLDMSIA: + case ARM::VLDMSIA_UPD: + case ARM::VSTMSIA: + case ARM::VSTMSIA_UPD: + case ARM::VLDMDIA: + case ARM::VLDMDIA_UPD: + case ARM::VSTMDIA: + case ARM::VSTMDIA_UPD: + return ARM_AM::ia; + + case ARM::LDMDA: + case ARM::LDMDA_UPD: + case ARM::STMDA: + case ARM::STMDA_UPD: + return ARM_AM::da; + + case ARM::LDMDB: + case ARM::LDMDB_UPD: + case ARM::STMDB: + case ARM::STMDB_UPD: + case ARM::t2LDMDB: + case ARM::t2LDMDB_UPD: + case ARM::t2STMDB: + case ARM::t2STMDB_UPD: + case ARM::VLDMSDB: + case ARM::VLDMSDB_UPD: + case ARM::VSTMSDB: + case ARM::VSTMSDB_UPD: + case ARM::VLDMDDB: + case ARM::VLDMDDB_UPD: + case ARM::VSTMDDB: + case ARM::VSTMDDB_UPD: + return ARM_AM::db; + + case ARM::LDMIB: + case ARM::LDMIB_UPD: + case ARM::STMIB: + case ARM::STMIB_UPD: + return ARM_AM::ib; + } + + return ARM_AM::bad_am_submode; +} + + } // end namespace ARM_AM +} // end namespace llvm + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDR || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT2i32Load(Opc); } static bool isT2i32Store(unsigned Opc) { @@ -174,7 +282,7 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STR || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT2i32Store(Opc); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -245,10 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); - Opcode = getLoadStoreMultipleOpcode(Opcode); + Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); + .addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -271,22 +379,14 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // First calculate which of the registers should be killed by the merged // instruction. const unsigned insertPos = memOps[insertAfter].Position; - - SmallSet<unsigned, 4> UnavailRegs; SmallSet<unsigned, 4> KilledRegs; DenseMap<unsigned, unsigned> Killer; - for (unsigned i = 0; i < memOpsBegin; ++i) { - if (memOps[i].Position < insertPos && memOps[i].isKill) { - unsigned Reg = memOps[i].Reg; - if (memOps[i].Merged) - UnavailRegs.insert(Reg); - else { - KilledRegs.insert(Reg); - Killer[Reg] = i; - } + for (unsigned i = 0, e = memOps.size(); i != e; ++i) { + if (i == memOpsBegin) { + i = memOpsEnd; + if (i == e) + break; } - } - for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { if (memOps[i].Position < insertPos && memOps[i].isKill) { unsigned Reg = memOps[i].Reg; KilledRegs.insert(Reg); @@ -297,12 +397,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, SmallVector<std::pair<unsigned, bool>, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; - if (UnavailRegs.count(Reg)) - // Register is killed before and it's not easy / possible to update the - // kill marker on already merged instructions. Abort. - return; - - // If we are inserting the merged operation after an unmerged operation that + // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); @@ -318,17 +413,24 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // Merge succeeded, update records. Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - // Remove kill flags from any unmerged memops that come before insertPos. + // Remove kill flags from any memops that come before insertPos. if (Regs[i-memOpsBegin].second) { unsigned Reg = Regs[i-memOpsBegin].first; if (KilledRegs.count(Reg)) { unsigned j = Killer[Reg]; - memOps[j].MBBI->getOperand(0).setIsKill(false); + int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); + assert(Idx >= 0 && "Cannot find killing operand"); + memOps[j].MBBI->getOperand(Idx).setIsKill(false); memOps[j].isKill = false; } + memOps[i].isKill = true; } MBB.erase(memOps[i].MBBI); + // Update this memop to refer to the merged instruction. + // We may need to move kill flags again. memOps[i].Merged = true; + memOps[i].MBBI = Merges.back(); + memOps[i].Position = insertPos; } } @@ -349,7 +451,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : ARMRegisterInfo::getRegisterNumbering(PReg); + : getARMRegisterNumbering(PReg); unsigned Count = 1; for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { @@ -357,7 +459,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX - : ARMRegisterInfo::getRegisterNumbering(Reg); + : getARMRegisterNumbering(Reg); // Register numbers must be in ascending order. For VFP, the registers // must also be consecutive and there is a limit of 16 double-word // registers per instruction. @@ -440,8 +542,8 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { switch (MI->getOpcode()) { default: return 0; - case ARM::LDR: - case ARM::STR: + case ARM::LDRi12: + case ARM::STRi12: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -452,31 +554,109 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::VLDRD: case ARM::VSTRD: return 8; - case ARM::LDM: - case ARM::STM: - case ARM::t2LDM: - case ARM::t2STM: - case ARM::VLDMS: - case ARM::VSTMS: - return (MI->getNumOperands() - 4) * 4; - case ARM::VLDMD: - case ARM::VSTMD: - return (MI->getNumOperands() - 4) * 8; + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2STMIA: + case ARM::t2STMDB: + case ARM::VLDMSIA: + case ARM::VLDMSDB: + case ARM::VSTMSIA: + case ARM::VSTMSDB: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; + case ARM::VLDMDIA: + case ARM::VLDMDDB: + case ARM::VSTMDIA: + case ARM::VSTMDDB: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; } } -static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { +static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, + ARM_AM::AMSubMode Mode) { switch (Opc) { - case ARM::LDM: return ARM::LDM_UPD; - case ARM::STM: return ARM::STM_UPD; - case ARM::t2LDM: return ARM::t2LDM_UPD; - case ARM::t2STM: return ARM::t2STM_UPD; - case ARM::VLDMS: return ARM::VLDMS_UPD; - case ARM::VLDMD: return ARM::VLDMD_UPD; - case ARM::VSTMS: return ARM::VSTMS_UPD; - case ARM::VSTMD: return ARM::VSTMD_UPD; default: llvm_unreachable("Unhandled opcode!"); + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::LDMIA_UPD; + case ARM_AM::ib: return ARM::LDMIB_UPD; + case ARM_AM::da: return ARM::LDMDA_UPD; + case ARM_AM::db: return ARM::LDMDB_UPD; + } + break; + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::STMIA_UPD; + case ARM_AM::ib: return ARM::STMIB_UPD; + case ARM_AM::da: return ARM::STMDA_UPD; + case ARM_AM::db: return ARM::STMDB_UPD; + } + break; + case ARM::t2LDMIA: + case ARM::t2LDMDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2LDMIA_UPD; + case ARM_AM::db: return ARM::t2LDMDB_UPD; + } + break; + case ARM::t2STMIA: + case ARM::t2STMDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2STMIA_UPD; + case ARM_AM::db: return ARM::t2STMDB_UPD; + } + break; + case ARM::VLDMSIA: + case ARM::VLDMSDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMSIA_UPD; + case ARM_AM::db: return ARM::VLDMSDB_UPD; + } + break; + case ARM::VLDMDIA: + case ARM::VLDMDDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMDIA_UPD; + case ARM_AM::db: return ARM::VLDMDDB_UPD; + } + break; + case ARM::VSTMSIA: + case ARM::VSTMSDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMSIA_UPD; + case ARM_AM::db: return ARM::VSTMSDB_UPD; + } + break; + case ARM::VSTMDIA: + case ARM::VSTMDDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMDIA_UPD; + case ARM_AM::db: return ARM::VSTMDDB_UPD; + } + break; } + return 0; } @@ -505,16 +685,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool DoMerge = false; - ARM_AM::AMSubMode Mode = ARM_AM::ia; - // Can't use an updating ld/st if the base register is also a dest // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. - for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).getReg() == Base) return false; - } - Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + + bool DoMerge = false; + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -560,15 +738,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (!DoMerge) return false; - unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); + unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)) .addImm(Pred).addReg(PredReg); + // Transfer the rest of operands. - for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) + for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); + // Transfer memoperands. (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -576,14 +755,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, return true; } -static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { +static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, + ARM_AM::AddrOpc Mode) { switch (Opc) { - case ARM::LDR: return ARM::LDR_PRE; - case ARM::STR: return ARM::STR_PRE; - case ARM::VLDRS: return ARM::VLDMS_UPD; - case ARM::VLDRD: return ARM::VLDMD_UPD; - case ARM::VSTRS: return ARM::VSTMS_UPD; - case ARM::VSTRD: return ARM::VSTMD_UPD; + case ARM::LDRi12: + return ARM::LDR_PRE; + case ARM::STRi12: + return ARM::STR_PRE; + case ARM::VLDRS: + return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; + case ARM::VLDRD: + return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; + case ARM::VSTRS: + return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; + case ARM::VSTRD: + return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_PRE; @@ -595,14 +781,21 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { return 0; } -static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { +static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, + ARM_AM::AddrOpc Mode) { switch (Opc) { - case ARM::LDR: return ARM::LDR_POST; - case ARM::STR: return ARM::STR_POST; - case ARM::VLDRS: return ARM::VLDMS_UPD; - case ARM::VLDRD: return ARM::VLDMD_UPD; - case ARM::VSTRS: return ARM::VSTMS_UPD; - case ARM::VSTRD: return ARM::VSTMD_UPD; + case ARM::LDRi12: + return ARM::LDR_POST; + case ARM::STRi12: + return ARM::STR_POST; + case ARM::VLDRS: + return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; + case ARM::VLDRD: + return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; + case ARM::VSTRS: + return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; + case ARM::VSTRD: + return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; @@ -629,14 +822,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, DebugLoc dl = MI->getDebugLoc(); bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); - bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); - if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) - return false; - if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) - return false; - if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) + bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); + if (isi32Load(Opcode) || isi32Store(Opcode)) if (MI->getOperand(2).getImm() != 0) return false; + if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) + return false; bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be @@ -666,7 +857,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, DoMerge = true; } if (DoMerge) { - NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); MBB.erase(PrevMBBI); } } @@ -685,7 +876,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, DoMerge = true; } if (DoMerge) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); if (NextMBBI == I) { Advance = true; ++I; @@ -698,12 +889,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; unsigned Offset = 0; - if (isAM5) - Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? - ARM_AM::db : ARM_AM::ia); - else if (isAM2) + if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); - else + else if (!isAM5) Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isAM5) { @@ -715,7 +903,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(isLd ? BaseKill : false)) - .addImm(Offset) .addImm(Pred).addReg(PredReg) .addReg(MO.getReg(), (isLd ? getDefRegState(true) : getKillRegState(MO.isKill()))); @@ -782,15 +969,14 @@ static bool isMemoryOp(const MachineInstr *MI) { int Opcode = MI->getOpcode(); switch (Opcode) { default: break; - case ARM::LDR: - case ARM::STR: - return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; case ARM::VLDRS: case ARM::VSTRS: return MI->getOperand(1).isReg(); case ARM::VLDRD: case ARM::VSTRD: return MI->getOperand(1).isReg(); + case ARM::LDRi12: + case ARM::STRi12: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -818,24 +1004,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { static int getMemoryOpOffset(const MachineInstr *MI) { int Opcode = MI->getOpcode(); - bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; unsigned NumOperands = MI->getDesc().getNumOperands(); unsigned OffField = MI->getOperand(NumOperands-3).getImm(); if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || + Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; - int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) - : (isAM3 ? ARM_AM::getAM3Offset(OffField) - : ARM_AM::getAM5Offset(OffField) * 4); - if (isAM2) { - if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else if (isAM3) { + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM3) { if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) Offset = -Offset; } else { @@ -847,35 +1028,24 @@ static int getMemoryOpOffset(const MachineInstr *MI) { static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - int OffImm, bool isDef, + int Offset, bool isDef, DebugLoc dl, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, - unsigned OffReg, bool OffKill, bool OffUndef, + bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, bool isT2) { - int Offset = OffImm; - if (!isT2) { - if (OffImm < 0) - Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); - else - Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); - } if (isDef) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); - if (!isT2) - MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); } else { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); - if (!isT2) - MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); } } @@ -906,23 +1076,21 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned BaseReg = BaseOp.getReg(); bool BaseKill = BaseOp.isKill(); bool BaseUndef = BaseOp.isUndef(); - unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); - if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { + if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a // ldm or stm. unsigned NewOpc = (isLd) - ? (isT2 ? ARM::t2LDM : ARM::LDM) - : (isT2 ? ARM::t2STM : ARM::STM); + ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) + : (isT2 ? ARM::t2STMIA : ARM::STMIA); if (isLd) { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(BaseReg, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); @@ -930,7 +1098,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, } else { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(BaseReg, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) .addReg(EvenReg, getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) @@ -941,28 +1108,24 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. - assert((!isT2 || !OffReg) && - "Thumb2 ldrd / strd does not encode offset register!"); unsigned NewOpc = (isLd) - ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) - : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); + ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. if (isLd && (BaseKill || OffKill) && - (TRI->regsOverlap(EvenReg, BaseReg) || - (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { - assert(!TRI->regsOverlap(OddReg, BaseReg) && - (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); + (TRI->regsOverlap(EvenReg, BaseReg))) { + assert(!TRI->regsOverlap(OddReg, BaseReg)); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, false, - BaseReg, false, BaseUndef, OffReg, false, OffUndef, + BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, - BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, + BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { if (OddReg == EvenReg && EvenDeadKill) { @@ -974,12 +1137,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, } InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, - BaseReg, false, BaseUndef, OffReg, false, OffUndef, + BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, - BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, + BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); } if (isLd) @@ -1158,17 +1321,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } -namespace { - struct OffsetCompare { - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - int LOffset = getMemoryOpOffset(LHS); - int ROffset = getMemoryOpOffset(RHS); - assert(LHS == RHS || LOffset != ROffset); - return LOffset > ROffset; - } - }; -} - /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it /// directly restore the value of LR into pc. @@ -1182,20 +1334,25 @@ namespace { bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; - MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); if (MBBI != MBB.begin() && (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::MOVPCLR)) { MachineInstr *PrevMI = prior(MBBI); - if (PrevMI->getOpcode() == ARM::LDM_UPD || - PrevMI->getOpcode() == ARM::t2LDM_UPD) { + unsigned Opcode = PrevMI->getOpcode(); + if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || + Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || + Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); if (MO.getReg() != ARM::LR) return false; - unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; + unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); + assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || + Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); + PrevMI->copyImplicitOps(&*MBBI); MBB.erase(MBBI); return true; } @@ -1216,7 +1373,8 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { ++MFI) { MachineBasicBlock &MBB = *MFI; Modified |= LoadStoreMultipleOpti(MBB); - Modified |= MergeReturnIntoLDM(MBB); + if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) + Modified |= MergeReturnIntoLDM(MBB); } delete RS; @@ -1250,7 +1408,7 @@ namespace { bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, unsigned &OddReg, unsigned &BaseReg, - unsigned &OffReg, int &Offset, + int &Offset, unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, @@ -1292,7 +1450,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, if (I->isDebugValue() || MemOps.count(&*I)) continue; const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) + if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) return false; if (isLd && TID.mayStore()) return false; @@ -1330,8 +1488,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, unsigned &OddReg, unsigned &BaseReg, - unsigned &OffReg, int &Offset, - unsigned &PredReg, + int &Offset, unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2) { // Make sure we're allowed to generate LDRD/STRD. @@ -1341,9 +1498,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDR) + if (Opcode == ARM::LDRi12) NewOpc = ARM::LDRD; - else if (Opcode == ARM::STR) + else if (Opcode == ARM::STRi12) NewOpc = ARM::STRD; else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; @@ -1356,12 +1513,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, } else return false; - // Make sure the offset registers match. - if (!isT2 && - (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) - return false; - - // Must sure the base address satisfies i64 ld / st alignment requirement. + // Make sure the base address satisfies i64 ld / st alignment requirement. if (!Op0->hasOneMemOperand() || !(*Op0->memoperands_begin())->getValue() || (*Op0->memoperands_begin())->isVolatile()) @@ -1370,7 +1522,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, unsigned Align = (*Op0->memoperands_begin())->getAlignment(); const Function *Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() - ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) + ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) : 8; // Pre-v6 need 8-byte align if (Align < ReqAlign) return false; @@ -1404,13 +1556,22 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - if (!isT2) - OffReg = Op0->getOperand(2).getReg(); Pred = llvm::getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } +namespace { + struct OffsetCompare { + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + } + }; +} + bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, SmallVector<MachineInstr*, 4> &Ops, unsigned Base, bool isLd, @@ -1493,14 +1654,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MachineInstr *Op0 = Ops.back(); MachineInstr *Op1 = Ops[Ops.size()-2]; unsigned EvenReg = 0, OddReg = 0; - unsigned BaseReg = 0, OffReg = 0, PredReg = 0; + unsigned BaseReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; bool isT2 = false; unsigned NewOpc = 0; int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, - EvenReg, OddReg, BaseReg, OffReg, + EvenReg, OddReg, BaseReg, Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); @@ -1512,8 +1673,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, .addReg(EvenReg, RegState::Define) .addReg(OddReg, RegState::Define) .addReg(BaseReg); + // FIXME: We're converting from LDRi12 to an insn that still + // uses addrmode2, so we need an explicit offset reg. It should + // always by reg0 since we're transforming LDRi12s. if (!isT2) - MIB.addReg(OffReg); + MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumLDRDFormed; } else { @@ -1522,8 +1686,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, .addReg(EvenReg) .addReg(OddReg) .addReg(BaseReg); + // FIXME: We're converting from LDRi12 to an insn that still + // uses addrmode2, so we need an explicit offset reg. It should + // always by reg0 since we're transforming STRi12s. if (!isT2) - MIB.addReg(OffReg); + MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumSTRDFormed; } |