1 files changed, 43 insertions, 23 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 91e4bf7..ba346d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -1,4 +1,4 @@
-//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===//
+//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,33 +12,46 @@
 /// branches when it's expected that jumping over the untaken control flow will
 /// be cheaper than having every workitem no-op through it.
 //
+//===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "si-insert-skips"
 
-namespace {
-
 static cl::opt<unsigned> SkipThresholdFlag(
   "amdgpu-skip-threshold",
   cl::desc("Number of instructions before jumping over divergent control flow"),
   cl::init(12), cl::Hidden);
 
+namespace {
+
 class SIInsertSkips : public MachineFunctionPass {
 private:
-  const SIRegisterInfo *TRI;
-  const SIInstrInfo *TII;
-  unsigned SkipThreshold;
+  const SIRegisterInfo *TRI = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  unsigned SkipThreshold = 0;
 
   bool shouldSkip(const MachineBasicBlock &From,
                   const MachineBasicBlock &To) const;
@@ -55,8 +68,7 @@ private:
 public:
   static char ID;
 
-  SIInsertSkips() :
-    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { }
+  SIInsertSkips() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -69,7 +81,7 @@ public:
   }
 };
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 char SIInsertSkips::ID = 0;
 
@@ -195,8 +207,8 @@ void SIInsertSkips::kill(MachineInstr &MI) {
     }
   } else {
     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
-      .addImm(0)
-      .addOperand(Op);
+        .addImm(0)
+        .add(Op);
   }
 }
 
@@ -251,6 +263,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
        BI != BE; BI = NextBB) {
     NextBB = std::next(BI);
     MachineBasicBlock &MBB = *BI;
+    bool HaveSkipBlock = false;
 
     if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
       // Reached convergence point for last divergent branch.
@@ -270,27 +283,33 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr &MI = *I;
 
       switch (MI.getOpcode()) {
-      case AMDGPU::SI_MASK_BRANCH: {
+      case AMDGPU::SI_MASK_BRANCH:
         ExecBranchStack.push_back(MI.getOperand(0).getMBB());
         MadeChange |= skipMaskBranch(MI, MBB);
         break;
-      }
-      case AMDGPU::S_BRANCH: {
+
+      case AMDGPU::S_BRANCH:
         // Optimize out branches to the next block.
         // FIXME: Shouldn't this be handled by BranchFolding?
-        if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
+        if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
+          MI.eraseFromParent();
+        } else if (HaveSkipBlock) {
+          // Remove the given unconditional branch when a skip block has been
+          // inserted after the current one and let skip the two instructions
+          // performing the kill if the exec mask is non-zero.
           MI.eraseFromParent();
+        }
         break;
-      }
-      case AMDGPU::SI_KILL_TERMINATOR: {
+
+      case AMDGPU::SI_KILL_TERMINATOR:
         MadeChange = true;
         kill(MI);
 
         if (ExecBranchStack.empty()) {
           if (skipIfDead(MI, *NextBB)) {
+            HaveSkipBlock = true;
             NextBB = std::next(BI);
             BE = MF.end();
-            Next = MBB.end();
           }
         } else {
           HaveKill = true;
@@ -298,15 +317,15 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
 
         MI.eraseFromParent();
         break;
-      }
-      case AMDGPU::SI_RETURN: {
+
+      case AMDGPU::SI_RETURN_TO_EPILOG:
         // FIXME: Should move somewhere else
         assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
 
         // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
         // because external bytecode will be appended at the end.
         if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
-          // SI_RETURN is not the last instruction. Add an empty block at
+          // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
           // the end and jump there.
           if (!EmptyMBBAtEnd) {
             EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
@@ -318,7 +337,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
             .addMBB(EmptyMBBAtEnd);
           I->eraseFromParent();
         }
-      }
+        break;
+
       default:
         break;
       }