diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 50 |
1 files changed, 21 insertions, 29 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 1bdb1f0..9e972a5 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -26,7 +26,7 @@ // // - Live interval recomputing seems inefficient. This currently only matches // one pair, and recomputes live intervals and moves on to the next pair. It -// would be better to compute a list of all merges that need to occur +// would be better to compute a list of all merges that need to occur. // // - With a list of instructions to process, we can also merge more. If a // cluster of loads have offsets that are too large to fit in the 8-bit @@ -36,6 +36,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -61,7 +62,6 @@ private: MachineRegisterInfo *MRI; LiveIntervals *LIS; - static bool offsetsCanBeCombined(unsigned Offset0, unsigned Offset1, unsigned EltSize); @@ -69,10 +69,6 @@ private: MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I, unsigned EltSize); - void updateRegDefsUses(unsigned SrcReg, - unsigned DstReg, - unsigned SubIdx); - MachineBasicBlock::iterator mergeRead2Pair( MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, @@ -193,17 +189,6 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, return E; } -void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg, - unsigned DstReg, - unsigned SubIdx) { - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg), - E = MRI->reg_end(); I != E; ) { - MachineOperand &O = *I; - ++I; - O.substVirtReg(DstReg, SubIdx, *TRI); - } -} - MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, @@ -268,19 +253,19 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( .addOperand(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - LIS->InsertMachineInstrInMaps(Read2); + LIS->InsertMachineInstrInMaps(*Read2); // repairLiveintervalsInRange() doesn't handle physical register, so we have // to update the M0 range manually. - SlotIndex PairedIndex = LIS->getInstructionIndex(Paired); + SlotIndex PairedIndex = LIS->getInstructionIndex(*Paired); LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI)); LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex); bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot(); // The new write to the original destination register is now the copy. Steal // the old SlotIndex. - LIS->ReplaceMachineInstrInMaps(I, Copy0); - LIS->ReplaceMachineInstrInMaps(Paired, Copy1); + LIS->ReplaceMachineInstrInMaps(*I, *Copy0); + LIS->ReplaceMachineInstrInMaps(*Paired, *Copy1); I->eraseFromParent(); Paired->eraseFromParent(); @@ -291,7 +276,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( LIS->createAndComputeVirtRegInterval(DestReg); if (UpdateM0Range) { - SlotIndex Read2Index = LIS->getInstructionIndex(Read2); + SlotIndex Read2Index = LIS->getInstructionIndex(*Read2); M0Segment->end = Read2Index.getRegSlot(); } @@ -340,7 +325,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( // repairLiveintervalsInRange() doesn't handle physical register, so we have // to update the M0 range manually. - SlotIndex PairedIndex = LIS->getInstructionIndex(Paired); + SlotIndex PairedIndex = LIS->getInstructionIndex(*Paired); LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI)); LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex); bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot(); @@ -359,8 +344,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( // XXX - How do we express subregisters here? unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() }; - LIS->RemoveMachineInstrFromMaps(I); - LIS->RemoveMachineInstrFromMaps(Paired); + LIS->RemoveMachineInstrFromMaps(*I); + LIS->RemoveMachineInstrFromMaps(*Paired); I->eraseFromParent(); Paired->eraseFromParent(); @@ -368,7 +353,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs); if (UpdateM0Range) { - SlotIndex Write2Index = LIS->getInstructionIndex(Write2); + SlotIndex Write2Index = LIS->getInstructionIndex(*Write2); M0Segment->end = Write2Index.getRegSlot(); } @@ -423,9 +408,16 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { } bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { - const TargetSubtargetInfo &STM = MF.getSubtarget(); - TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo()); - TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo()); + if (skipFunction(*MF.getFunction())) + return false; + + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + if (!STM.loadStoreOptEnabled()) + return false; + + TII = STM.getInstrInfo(); + TRI = &TII->getRegisterInfo(); + MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); |