1 files changed, 125 insertions, 35 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 9e0086b..6a422e7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -68,6 +68,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -82,6 +83,9 @@ using namespace llvm;
 namespace {
 
 class SIFixSGPRCopies : public MachineFunctionPass {
+
+  MachineDominatorTree *MDT;
+
 public:
   static char ID;
 
@@ -89,11 +93,11 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Fix SGPR copies";
-  }
+  StringRef getPassName() const override { return "SI Fix SGPR copies"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -101,8 +105,12 @@ public:
 
 } // End anonymous namespace
 
-INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
-                "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
+                     "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
+                     "SI Fix SGPR copies", false, false)
+
 
 char SIFixSGPRCopies::ID = 0;
 
@@ -236,11 +244,94 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+static bool phiHasBreakDef(const MachineInstr &PHI,
+                           const MachineRegisterInfo &MRI,
+                           SmallSet<unsigned, 8> &Visited) {
+
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (Visited.count(Reg))
+      continue;
+
+    Visited.insert(Reg);
+
+    MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+    assert(DefInstr);
+    switch (DefInstr->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::SI_BREAK:
+    case AMDGPU::SI_IF_BREAK:
+    case AMDGPU::SI_ELSE_BREAK:
+      return true;
+    case AMDGPU::PHI:
+      if (phiHasBreakDef(*DefInstr, MRI, Visited))
+        return true;
+    }
+  }
+  return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+                                          const TargetRegisterInfo &TRI) {
+  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+       E = MBB.end(); I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+      return true;
+  }
+  return false;
+}
+
+static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
+                                    const MachineInstr *MoveImm,
+                                    const SIInstrInfo *TII,
+                                    unsigned &SMovOp,
+                                    int64_t &Imm) {
+
+  if (!MoveImm->isMoveImmediate())
+    return false;
+
+  const MachineOperand *ImmOp =
+      TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
+  if (!ImmOp->isImm())
+    return false;
+
+  // FIXME: Handle copies with sub-regs.
+  if (Copy->getOperand(0).getSubReg())
+    return false;
+
+  switch (MoveImm->getOpcode()) {
+  default:
+    return false;
+  case AMDGPU::V_MOV_B32_e32:
+    SMovOp = AMDGPU::S_MOV_B32;
+    break;
+  case AMDGPU::V_MOV_B64_PSEUDO:
+    SMovOp = AMDGPU::S_MOV_B64;
+    break;
+  }
+  Imm = ImmOp->getImm();
+  return true;
+}
+
 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
 
   SmallVector<MachineInstr *, 16> Worklist;
 
@@ -264,18 +355,40 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         const TargetRegisterClass *SrcRC, *DstRC;
         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
-          DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
+          MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+          unsigned SMovOp;
+          int64_t Imm;
+          // If we are just copying an immediate, we can replace the copy with
+          // s_mov_b32.
+          if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
+            MI.getOperand(1).ChangeToImmediate(Imm);
+            MI.addImplicitDefUseOperands(MF);
+            MI.setDesc(TII->get(SMovOp));
+            break;
+          }
           TII->moveToVALU(MI);
         }
 
         break;
       }
       case AMDGPU::PHI: {
-        DEBUG(dbgs() << "Fixing PHI: " << MI);
         unsigned Reg = MI.getOperand(0).getReg();
         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
           break;
 
+        // We don't need to fix the PHI if the common dominator of the
+        // two incoming blocks terminates with a uniform branch.
+        if (MI.getNumExplicitOperands() == 5) {
+          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+          MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
+          if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+            DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
+            break;
+          }
+        }
+
         // If a PHI node defines an SGPR and any of its operands are VGPRs,
         // then we need to move it to the VALU.
         //
@@ -302,10 +415,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         // ...
         // use sgpr2
         //
-        // FIXME: This is OK if the branching decision is made based on an
-        // SGPR value.
-        bool SGPRBranch = false;
-
         // The one exception to this rule is when one of the operands
         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
         // instruction.  In this case, there we know the program will
@@ -313,31 +422,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         // the first block (where the condition is computed), so there
         // is no chance for values to be over-written.
 
-        bool HasBreakDef = false;
-        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
-          unsigned Reg = MI.getOperand(i).getReg();
-          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
-            TII->moveToVALU(MI);
-            break;
-          }
-          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
-          assert(DefInstr);
-          switch(DefInstr->getOpcode()) {
-
-          case AMDGPU::SI_BREAK:
-          case AMDGPU::SI_IF_BREAK:
-          case AMDGPU::SI_ELSE_BREAK:
-          // If we see a PHI instruction that defines an SGPR, then that PHI
-          // instruction has already been considered and should have
-          // a *_BREAK as an operand.
-          case AMDGPU::PHI:
-            HasBreakDef = true;
-            break;
-          }
-        }
-
-        if (!SGPRBranch && !HasBreakDef)
+        SmallSet<unsigned, 8> Visited;
+        if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
+            !phiHasBreakDef(MI, MRI, Visited)) {
+          DEBUG(dbgs() << "Fixing PHI: " << MI);
           TII->moveToVALU(MI);
+        }
         break;
       }
       case AMDGPU::REG_SEQUENCE: {