diff options
Diffstat (limited to 'lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 194 |
1 files changed, 122 insertions, 72 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8585c1e..f80e316 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -74,11 +74,14 @@ namespace { private: struct MemOpQueueEntry { int Offset; + unsigned Reg; + bool isKill; unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) - : Offset(o), Position(p), MBBI(i), Merged(false) {} + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MachineBasicBlock::iterator i) + : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,30 +131,30 @@ namespace { static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: - NumLDMGened++; + ++NumLDMGened; return ARM::LDM; case ARM::STR: - NumSTMGened++; + ++NumSTMGened; return ARM::STM; case ARM::t2LDRi8: case ARM::t2LDRi12: - NumLDMGened++; + ++NumLDMGened; return ARM::t2LDM; case ARM::t2STRi8: case ARM::t2STRi12: - NumSTMGened++; + ++NumSTMGened; return ARM::t2STM; case ARM::VLDRS: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMS; case ARM::VSTRS: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMS; case ARM::VLDRD: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMD; case ARM::VSTRD: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } @@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. -void ARMLoadStoreOpt:: -MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - int Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { +void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, + MemOpQueue &memOps, + unsigned memOpsBegin, unsigned memOpsEnd, + unsigned insertAfter, int Offset, + unsigned Base, bool BaseKill, + int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, + DebugLoc dl, + SmallVector<MachineBasicBlock::iterator, 4> &Merges) { // First calculate which of the registers should be killed by the merged // instruction. - SmallVector<std::pair<unsigned, bool>, 8> Regs; const unsigned insertPos = memOps[insertAfter].Position; + + SmallSet<unsigned, 4> UnavailRegs; + SmallSet<unsigned, 4> KilledRegs; + DenseMap<unsigned, unsigned> Killer; + for (unsigned i = 0; i < memOpsBegin; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + if (memOps[i].Merged) + UnavailRegs.insert(Reg); + else { + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + } + for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + + SmallVector<std::pair<unsigned, bool>, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - const MachineOperand &MO = memOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - bool isKill = MO.isKill(); + unsigned Reg = memOps[i].Reg; + if (UnavailRegs.count(Reg)) + // Register is killed before and it's not easy / possible to update the + // kill marker on already merged instructions. Abort. + return; // If we are inserting the merged operation after an unmerged operation that // uses the same register, make sure to transfer any kill flag. - for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) - if (memOps[j].Position<insertPos) { - const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); - if (MOJ.getReg() == Reg && MOJ.isKill()) - isKill = true; - } - + bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; - Loc++; + ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Regs)) return; @@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB, Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any unmerged memops that come before insertPos. - if (Regs[i-memOpsBegin].second) - for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) - if (memOps[j].Position<insertPos) { - MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); - if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) - MOJ.setIsKill(false); - } + if (Regs[i-memOpsBegin].second) { + unsigned Reg = Regs[i-memOpsBegin].first; + if (KilledRegs.count(Reg)) { + unsigned j = Killer[Reg]; + memOps[j].MBBI->getOperand(0).setIsKill(false); + } + } MBB.erase(memOps[i].MBBI); memOps[i].Merged = true; } @@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isAM4) { if (Mode == ARM_AM::ia && isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (isAM4) { if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; AddSub = ARM_AM::sub; @@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (!isAM5 && isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; @@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). static bool isMemoryOp(const MachineInstr *MI) { - if (MI->hasOneMemOperand()) { - const MachineMemOperand *MMO = *MI->memoperands_begin(); + // When no memory operands are present, conservatively assume unaligned, + // volatile, unfoldable. + if (!MI->hasOneMemOperand()) + return false; - // Don't touch volatile memory accesses - we may be changing their order. - if (MMO->isVolatile()) - return false; + const MachineMemOperand *MMO = *MI->memoperands_begin(); - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is - // not. - if (MMO->getAlignment() < 4) - return false; - } + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO->isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. + if (MMO->getAlignment() < 4) + return false; // str <undef> could probably be eliminated entirely, but for now we just want // to avoid making a mess of it. @@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) return false; + MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } + NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. assert((!isT2 || !OffReg) && @@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { if (OddReg == EvenReg && EvenDeadKill) { - // If the two source operands are the same, the kill marker is probably - // on the first one. e.g. + // If the two source operands are the same, the kill marker is + // probably on the first one. e.g. // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 EvenDeadKill = false; OddDeadKill = true; @@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, @@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBBI = prior(MBBI); MBB.erase(MI); + MBBI = NewBBI; + return true; } return false; } @@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemOp) { int Opcode = MBBI->getOpcode(); unsigned Size = getLSMultipleTransferSize(MBBI); + const MachineOperand &MO = MBBI->getOperand(0); + unsigned Reg = MO.getReg(); + bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); @@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); + ++NumMemOps; Advance = true; } else { if (Clobber) { @@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; } else if (Offset == I->Offset) { @@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } - if (Advance) { + if (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == E) + // Reach the end of the block, try merging the memory instructions. + TryMerge = true; + } else if (Advance) { ++Position; ++MBBI; if (MBBI == E) @@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, // some day. SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { - if (MemOps.count(&*I)) + if (I->isDebugValue() || MemOps.count(&*I)) continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) @@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, std::sort(Ops.begin(), Ops.end(), OffsetCompare()); // The loads / stores of the same base are in order. Scan them from first to - // last and check for the followins: + // last and check for the following: // 1. Any def of base. // 2. Any gaps. while (Ops.size() > 1) { @@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MemOps.count(InsertPos)) + while (InsertPos != MBB->end() + && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { break; } - MI2LocMap[MI] = Loc++; + if (!MI->isDebugValue()) + MI2LocMap[MI] = ++Loc; + if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; |