diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 160 |
1 files changed, 125 insertions, 35 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 9e0086b..6a422e7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -68,6 +68,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -82,6 +83,9 @@ using namespace llvm; namespace { class SIFixSGPRCopies : public MachineFunctionPass { + + MachineDominatorTree *MDT; + public: static char ID; @@ -89,11 +93,11 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { - return "SI Fix SGPR copies"; - } + StringRef getPassName() const override { return "SI Fix SGPR copies"; } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -101,8 +105,12 @@ public: } // End anonymous namespace -INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE, - "SI Fix SGPR copies", false, false) +INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, + "SI Fix SGPR copies", false, false) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, + "SI Fix SGPR copies", false, false) + char SIFixSGPRCopies::ID = 0; @@ -236,11 +244,94 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } +static bool phiHasVGPROperands(const MachineInstr &PHI, + const MachineRegisterInfo &MRI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII) { + + for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { + unsigned Reg = PHI.getOperand(i).getReg(); + if (TRI->hasVGPRs(MRI.getRegClass(Reg))) + return true; + } + return false; +} +static bool phiHasBreakDef(const MachineInstr &PHI, + const MachineRegisterInfo &MRI, + SmallSet<unsigned, 8> &Visited) { + + for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { + unsigned Reg = PHI.getOperand(i).getReg(); + if (Visited.count(Reg)) + continue; + + Visited.insert(Reg); + + MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); + assert(DefInstr); + switch (DefInstr->getOpcode()) { + default: + break; + case AMDGPU::SI_BREAK: + case AMDGPU::SI_IF_BREAK: + case AMDGPU::SI_ELSE_BREAK: + return true; + case AMDGPU::PHI: + if (phiHasBreakDef(*DefInstr, MRI, Visited)) + return true; + } + } + return false; +} + +static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, + const TargetRegisterInfo &TRI) { + for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), + E = MBB.end(); I != E; ++I) { + if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) + return true; + } + return false; +} + +static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, + const MachineInstr *MoveImm, + const SIInstrInfo *TII, + unsigned &SMovOp, + int64_t &Imm) { + + if (!MoveImm->isMoveImmediate()) + return false; + + const MachineOperand *ImmOp = + TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0); + if (!ImmOp->isImm()) + return false; + + // FIXME: Handle copies with sub-regs. + if (Copy->getOperand(0).getSubReg()) + return false; + + switch (MoveImm->getOpcode()) { + default: + return false; + case AMDGPU::V_MOV_B32_e32: + SMovOp = AMDGPU::S_MOV_B32; + break; + case AMDGPU::V_MOV_B64_PSEUDO: + SMovOp = AMDGPU::S_MOV_B64; + break; + } + Imm = ImmOp->getImm(); + return true; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); SmallVector<MachineInstr *, 16> Worklist; @@ -264,18 +355,40 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); + MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + unsigned SMovOp; + int64_t Imm; + // If we are just copying an immediate, we can replace the copy with + // s_mov_b32. + if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { + MI.getOperand(1).ChangeToImmediate(Imm); + MI.addImplicitDefUseOperands(MF); + MI.setDesc(TII->get(SMovOp)); + break; + } TII->moveToVALU(MI); } break; } case AMDGPU::PHI: { - DEBUG(dbgs() << "Fixing PHI: " << MI); unsigned Reg = MI.getOperand(0).getReg(); if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) break; + // We don't need to fix the PHI if the common dominator of the + // two incoming blocks terminates with a uniform branch. + if (MI.getNumExplicitOperands() == 5) { + MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); + MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); + + MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1); + if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) { + DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n'); + break; + } + } + // If a PHI node defines an SGPR and any of its operands are VGPRs, // then we need to move it to the VALU. // @@ -302,10 +415,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { // ... // use sgpr2 // - // FIXME: This is OK if the branching decision is made based on an - // SGPR value. - bool SGPRBranch = false; - // The one exception to this rule is when one of the operands // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK // instruction. In this case, there we know the program will @@ -313,31 +422,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { // the first block (where the condition is computed), so there // is no chance for values to be over-written. - bool HasBreakDef = false; - for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { - unsigned Reg = MI.getOperand(i).getReg(); - if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { - TII->moveToVALU(MI); - break; - } - MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); - assert(DefInstr); - switch(DefInstr->getOpcode()) { - - case AMDGPU::SI_BREAK: - case AMDGPU::SI_IF_BREAK: - case AMDGPU::SI_ELSE_BREAK: - // If we see a PHI instruction that defines an SGPR, then that PHI - // instruction has already been considered and should have - // a *_BREAK as an operand. - case AMDGPU::PHI: - HasBreakDef = true; - break; - } - } - - if (!SGPRBranch && !HasBreakDef) + SmallSet<unsigned, 8> Visited; + if (phiHasVGPROperands(MI, MRI, TRI, TII) || + !phiHasBreakDef(MI, MRI, Visited)) { + DEBUG(dbgs() << "Fixing PHI: " << MI); TII->moveToVALU(MI); + } break; } case AMDGPU::REG_SEQUENCE: { |