diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp | 67 |
1 files changed, 27 insertions, 40 deletions
diff --git a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp index 0cb6746..9b1d256 100644 --- a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -55,7 +56,6 @@ namespace { class SILoadStoreOptimizer : public MachineFunctionPass { private: - const TargetMachine *TM; const SIInstrInfo *TII; const SIRegisterInfo *TRI; MachineRegisterInfo *MRI; @@ -86,20 +86,11 @@ private: public: static char ID; - SILoadStoreOptimizer() : - MachineFunctionPass(ID), - TM(nullptr), - TII(nullptr), - TRI(nullptr), - MRI(nullptr), - LIS(nullptr) { + SILoadStoreOptimizer() + : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr), + LIS(nullptr) {} - } - - SILoadStoreOptimizer(const TargetMachine &TM_) : - MachineFunctionPass(ID), - TM(&TM_), - TII(static_cast<const SIInstrInfo*>(TM->getSubtargetImpl()->getInstrInfo())) { + SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) { initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -222,7 +213,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( // Be careful, since the addresses could be subregisters themselves in weird // cases, like vectors of pointers. const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr); - const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0); unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg(); unsigned DestReg1 @@ -259,41 +249,28 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( DebugLoc DL = I->getDebugLoc(); MachineInstrBuilder Read2 = BuildMI(*MBB, I, DL, Read2Desc, DestReg) - .addImm(0) // gds .addOperand(*AddrReg) // addr .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 - .addOperand(*M0Reg) // M0 + .addImm(0) // gds .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); - LIS->InsertMachineInstrInMaps(Read2); - unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; updateRegDefsUses(DestReg0, DestReg, SubRegIdx0); updateRegDefsUses(DestReg1, DestReg, SubRegIdx1); LIS->RemoveMachineInstrFromMaps(I); - LIS->RemoveMachineInstrFromMaps(Paired); + // Replacing Paired in the maps with Read2 allows us to avoid updating the + // live range for the m0 register. + LIS->ReplaceMachineInstrInMaps(Paired, Read2); I->eraseFromParent(); Paired->eraseFromParent(); LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg()); LIS->shrinkToUses(&AddrRegLI); - LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg()); - LIS->shrinkToUses(&M0RegLI); - - // Currently m0 is treated as a register class with one member instead of an - // implicit physical register. We are using the virtual register for the first - // one, but we still need to update the live range of the now unused second m0 - // virtual register to avoid verifier errors. - const MachineOperand *PairedM0Reg - = TII->getNamedOperand(*Paired, AMDGPU::OpName::m0); - LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg()); - LIS->shrinkToUses(&PairedM0RegLI); - LIS->getInterval(DestReg); // Create new LI DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); @@ -309,7 +286,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( // Be sure to use .addOperand(), and not .addReg() with these. We want to be // sure we preserve the subregister index and any register flags set on them. const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr); - const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0); const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0); const MachineOperand *Data1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0); @@ -340,29 +316,40 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( const MCInstrDesc &Write2Desc = TII->get(Opc); DebugLoc DL = I->getDebugLoc(); + // repairLiveintervalsInRange() doesn't handle physical register, so we have + // to update the M0 range manually. + SlotIndex PairedIndex = LIS->getInstructionIndex(Paired); + LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI)); + LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex); + bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot(); + MachineInstrBuilder Write2 = BuildMI(*MBB, I, DL, Write2Desc) - .addImm(0) // gds .addOperand(*Addr) // addr .addOperand(*Data0) // data0 .addOperand(*Data1) // data1 .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 - .addOperand(*M0Reg) // m0 + .addImm(0) // gds .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); // XXX - How do we express subregisters here? - unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(), - M0Reg->getReg()}; + unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() }; LIS->RemoveMachineInstrFromMaps(I); LIS->RemoveMachineInstrFromMaps(Paired); I->eraseFromParent(); Paired->eraseFromParent(); + // This doesn't handle physical registers like M0 LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs); + if (UpdateM0Range) { + SlotIndex Write2Index = LIS->getInstructionIndex(Write2); + M0Segment->end = Write2Index.getRegSlot(); + } + DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n'); return Write2.getInstr(); } @@ -414,9 +401,9 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { } bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { - const TargetSubtargetInfo *STM = MF.getTarget().getSubtargetImpl(); - TRI = static_cast<const SIRegisterInfo*>(STM->getRegisterInfo()); - TII = static_cast<const SIInstrInfo*>(STM->getInstrInfo()); + const TargetSubtargetInfo &STM = MF.getSubtarget(); + TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo()); + TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo()); MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); |