diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 66 |
1 files changed, 43 insertions, 23 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 91e4bf7..ba346d2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -1,4 +1,4 @@ -//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===// +//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===// // // The LLVM Compiler Infrastructure // @@ -12,33 +12,46 @@ /// branches when it's expected that jumping over the untaken control flow will /// be cheaper than having every workitem no-op through it. // +//===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <cstdint> +#include <iterator> using namespace llvm; #define DEBUG_TYPE "si-insert-skips" -namespace { - static cl::opt<unsigned> SkipThresholdFlag( "amdgpu-skip-threshold", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden); +namespace { + class SIInsertSkips : public MachineFunctionPass { private: - const SIRegisterInfo *TRI; - const SIInstrInfo *TII; - unsigned SkipThreshold; + const SIRegisterInfo *TRI = nullptr; + const SIInstrInfo *TII = nullptr; + unsigned SkipThreshold = 0; bool shouldSkip(const MachineBasicBlock &From, const MachineBasicBlock &To) const; @@ -55,8 +68,7 @@ private: public: static char ID; - SIInsertSkips() : - MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { } + SIInsertSkips() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -69,7 +81,7 @@ public: } }; -} // End anonymous namespace +} // end anonymous namespace char SIInsertSkips::ID = 0; @@ -195,8 +207,8 @@ void SIInsertSkips::kill(MachineInstr &MI) { } } else { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32)) - .addImm(0) - .addOperand(Op); + .addImm(0) + .add(Op); } } @@ -251,6 +263,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { BI != BE; BI = NextBB) { NextBB = std::next(BI); MachineBasicBlock &MBB = *BI; + bool HaveSkipBlock = false; if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) { // Reached convergence point for last divergent branch. @@ -270,27 +283,33 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; switch (MI.getOpcode()) { - case AMDGPU::SI_MASK_BRANCH: { + case AMDGPU::SI_MASK_BRANCH: ExecBranchStack.push_back(MI.getOperand(0).getMBB()); MadeChange |= skipMaskBranch(MI, MBB); break; - } - case AMDGPU::S_BRANCH: { + + case AMDGPU::S_BRANCH: // Optimize out branches to the next block. // FIXME: Shouldn't this be handled by BranchFolding? - if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) + if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) { + MI.eraseFromParent(); + } else if (HaveSkipBlock) { + // Remove the given unconditional branch when a skip block has been + // inserted after the current one and let skip the two instructions + // performing the kill if the exec mask is non-zero. MI.eraseFromParent(); + } break; - } - case AMDGPU::SI_KILL_TERMINATOR: { + + case AMDGPU::SI_KILL_TERMINATOR: MadeChange = true; kill(MI); if (ExecBranchStack.empty()) { if (skipIfDead(MI, *NextBB)) { + HaveSkipBlock = true; NextBB = std::next(BI); BE = MF.end(); - Next = MBB.end(); } } else { HaveKill = true; @@ -298,15 +317,15 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); break; - } - case AMDGPU::SI_RETURN: { + + case AMDGPU::SI_RETURN_TO_EPILOG: // FIXME: Should move somewhere else assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()); // Graphics shaders returning non-void shouldn't contain S_ENDPGM, // because external bytecode will be appended at the end. if (BI != --MF.end() || I != MBB.getFirstTerminator()) { - // SI_RETURN is not the last instruction. Add an empty block at + // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at // the end and jump there. if (!EmptyMBBAtEnd) { EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); @@ -318,7 +337,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { .addMBB(EmptyMBBAtEnd); I->eraseFromParent(); } - } + break; + default: break; } |