summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp160
1 files changed, 125 insertions, 35 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 9e0086b..6a422e7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -68,6 +68,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -82,6 +83,9 @@ using namespace llvm;
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
+
+ MachineDominatorTree *MDT;
+
public:
static char ID;
@@ -89,11 +93,11 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const override {
- return "SI Fix SGPR copies";
- }
+ StringRef getPassName() const override { return "SI Fix SGPR copies"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -101,8 +105,12 @@ public:
} // End anonymous namespace
-INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
- "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
+ "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
+ "SI Fix SGPR copies", false, false)
+
char SIFixSGPRCopies::ID = 0;
@@ -236,11 +244,94 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return true;
}
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+ const MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII) {
+
+ for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+ unsigned Reg = PHI.getOperand(i).getReg();
+ if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+ return true;
+ }
+ return false;
+}
+static bool phiHasBreakDef(const MachineInstr &PHI,
+ const MachineRegisterInfo &MRI,
+ SmallSet<unsigned, 8> &Visited) {
+
+ for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+ unsigned Reg = PHI.getOperand(i).getReg();
+ if (Visited.count(Reg))
+ continue;
+
+ Visited.insert(Reg);
+
+ MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+ assert(DefInstr);
+ switch (DefInstr->getOpcode()) {
+ default:
+ break;
+ case AMDGPU::SI_BREAK:
+ case AMDGPU::SI_IF_BREAK:
+ case AMDGPU::SI_ELSE_BREAK:
+ return true;
+ case AMDGPU::PHI:
+ if (phiHasBreakDef(*DefInstr, MRI, Visited))
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+ E = MBB.end(); I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+ return true;
+ }
+ return false;
+}
+
+static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
+ const MachineInstr *MoveImm,
+ const SIInstrInfo *TII,
+ unsigned &SMovOp,
+ int64_t &Imm) {
+
+ if (!MoveImm->isMoveImmediate())
+ return false;
+
+ const MachineOperand *ImmOp =
+ TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
+ if (!ImmOp->isImm())
+ return false;
+
+ // FIXME: Handle copies with sub-regs.
+ if (Copy->getOperand(0).getSubReg())
+ return false;
+
+ switch (MoveImm->getOpcode()) {
+ default:
+ return false;
+ case AMDGPU::V_MOV_B32_e32:
+ SMovOp = AMDGPU::S_MOV_B32;
+ break;
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ SMovOp = AMDGPU::S_MOV_B64;
+ break;
+ }
+ Imm = ImmOp->getImm();
+ return true;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
SmallVector<MachineInstr *, 16> Worklist;
@@ -264,18 +355,40 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
- DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
+ MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ unsigned SMovOp;
+ int64_t Imm;
+ // If we are just copying an immediate, we can replace the copy with
+ // s_mov_b32.
+ if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
+ MI.getOperand(1).ChangeToImmediate(Imm);
+ MI.addImplicitDefUseOperands(MF);
+ MI.setDesc(TII->get(SMovOp));
+ break;
+ }
TII->moveToVALU(MI);
}
break;
}
case AMDGPU::PHI: {
- DEBUG(dbgs() << "Fixing PHI: " << MI);
unsigned Reg = MI.getOperand(0).getReg();
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;
+ // We don't need to fix the PHI if the common dominator of the
+ // two incoming blocks terminates with a uniform branch.
+ if (MI.getNumExplicitOperands() == 5) {
+ MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+ MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+ MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
+ if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+ DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
+ break;
+ }
+ }
+
// If a PHI node defines an SGPR and any of its operands are VGPRs,
// then we need to move it to the VALU.
//
@@ -302,10 +415,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// ...
// use sgpr2
//
- // FIXME: This is OK if the branching decision is made based on an
- // SGPR value.
- bool SGPRBranch = false;
-
// The one exception to this rule is when one of the operands
// is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
// instruction. In this case, there we know the program will
@@ -313,31 +422,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// the first block (where the condition is computed), so there
// is no chance for values to be over-written.
- bool HasBreakDef = false;
- for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
- unsigned Reg = MI.getOperand(i).getReg();
- if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
- TII->moveToVALU(MI);
- break;
- }
- MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
- assert(DefInstr);
- switch(DefInstr->getOpcode()) {
-
- case AMDGPU::SI_BREAK:
- case AMDGPU::SI_IF_BREAK:
- case AMDGPU::SI_ELSE_BREAK:
- // If we see a PHI instruction that defines an SGPR, then that PHI
- // instruction has already been considered and should have
- // a *_BREAK as an operand.
- case AMDGPU::PHI:
- HasBreakDef = true;
- break;
- }
- }
-
- if (!SGPRBranch && !HasBreakDef)
+ SmallSet<unsigned, 8> Visited;
+ if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
+ !phiHasBreakDef(MI, MRI, Visited)) {
+ DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI);
+ }
break;
}
case AMDGPU::REG_SEQUENCE: {
OpenPOWER on IntegriCloud