summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp245
1 files changed, 237 insertions, 8 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 6a422e7..0a795c9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -68,6 +68,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -80,6 +81,11 @@ using namespace llvm;
#define DEBUG_TYPE "si-fix-sgpr-copies"
+static cl::opt<bool> EnableM0Merge(
+ "amdgpu-enable-merge-m0",
+ cl::desc("Merge and hoist M0 initializations"),
+ cl::init(false));
+
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
@@ -107,7 +113,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
@@ -168,6 +174,31 @@ static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
}
+static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII) {
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ auto &Src = MI.getOperand(1);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = Src.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg))
+ return false;
+
+ for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
+ const auto *UseMI = MO.getParent();
+ if (UseMI == &MI)
+ continue;
+ if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
+ UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
+ !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
+ return false;
+ }
+ // Change VGPR to SGPR destination.
+ MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
+ return true;
+}
+
// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
//
// SGPRx = ...
@@ -198,12 +229,19 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
if (!CopyUse.isCopy())
return false;
+ // It is illegal to have vreg inputs to a physreg defining reg_sequence.
+ if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
+ return false;
+
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
return false;
+ if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
+ return true;
+
// TODO: Could have multiple extracts?
unsigned SubReg = CopyUse.getOperand(1).getSubReg();
if (SubReg != AMDGPU::NoSubRegister)
@@ -234,8 +272,9 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
- BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
- .addOperand(MI.getOperand(I));
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
+ TmpReg)
+ .add(MI.getOperand(I));
MI.getOperand(I).setReg(TmpReg);
}
@@ -267,8 +306,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
Visited.insert(Reg);
- MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
- assert(DefInstr);
+ MachineInstr *DefInstr = MRI.getVRegDef(Reg);
switch (DefInstr->getOpcode()) {
default:
break;
@@ -326,6 +364,186 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
return true;
}
+template <class UnaryPredicate>
+bool searchPredecessors(const MachineBasicBlock *MBB,
+ const MachineBasicBlock *CutOff,
+ UnaryPredicate Predicate) {
+
+ if (MBB == CutOff)
+ return false;
+
+ DenseSet<const MachineBasicBlock*> Visited;
+ SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
+ MBB->pred_end());
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
+
+ if (!Visited.insert(MBB).second)
+ continue;
+ if (MBB == CutOff)
+ continue;
+ if (Predicate(MBB))
+ return true;
+
+ Worklist.append(MBB->pred_begin(), MBB->pred_end());
+ }
+
+ return false;
+}
+
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+ return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
+// Checks if there is potential path From instruction To instruction.
+// If CutOff is specified and it sits in between of that path we ignore
+// a higher portion of the path and report it is not reachable.
+static bool isReachable(const MachineInstr *From,
+ const MachineInstr *To,
+ const MachineBasicBlock *CutOff,
+ MachineDominatorTree &MDT) {
+ // If either From block dominates To block or instructions are in the same
+ // block and From is higher.
+ if (MDT.dominates(From, To))
+ return true;
+
+ const MachineBasicBlock *MBBFrom = From->getParent();
+ const MachineBasicBlock *MBBTo = To->getParent();
+ if (MBBFrom == MBBTo)
+ return false;
+
+ // Instructions are in different blocks, do predecessor search.
+ // We should almost never get here since we do not usually produce M0 stores
+ // other than -1.
+ return searchPredecessors(MBBTo, CutOff, [MBBFrom]
+ (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
+}
+
+// Hoist and merge identical SGPR initializations into a common predecessor.
+// This is intended to combine M0 initializations, but can work with any
+// SGPR. A VGPR cannot be processed since we cannot guarantee vector
+// executioon.
+static bool hoistAndMergeSGPRInits(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ MachineDominatorTree &MDT) {
+ // List of inits by immediate value.
+ typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap;
+ InitListMap Inits;
+ // List of clobbering instructions.
+ SmallVector<MachineInstr*, 8> Clobbers;
+ bool Changed = false;
+
+ for (auto &MI : MRI.def_instructions(Reg)) {
+ MachineOperand *Imm = nullptr;
+ for (auto &MO: MI.operands()) {
+ if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
+ (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
+ Imm = nullptr;
+ break;
+ } else if (MO.isImm())
+ Imm = &MO;
+ }
+ if (Imm)
+ Inits[Imm->getImm()].push_front(&MI);
+ else
+ Clobbers.push_back(&MI);
+ }
+
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+
+ for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
+ MachineInstr *MI1 = *I1;
+
+ for (auto I2 = std::next(I1); I2 != E; ) {
+ MachineInstr *MI2 = *I2;
+
+ // Check any possible interference
+ auto intereferes = [&](MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To) -> bool {
+
+ assert(MDT.dominates(&*To, &*From));
+
+ auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
+ const MachineBasicBlock *MBBFrom = From->getParent();
+ const MachineBasicBlock *MBBTo = To->getParent();
+ bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
+ bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
+ if (!MayClobberFrom && !MayClobberTo)
+ return false;
+ if ((MayClobberFrom && !MayClobberTo) ||
+ (!MayClobberFrom && MayClobberTo))
+ return true;
+ // Both can clobber, this is not an interference only if both are
+ // dominated by Clobber and belong to the same block or if Clobber
+ // properly dominates To, given that To >> From, so it dominates
+ // both and located in a common dominator.
+ return !((MBBFrom == MBBTo &&
+ MDT.dominates(Clobber, &*From) &&
+ MDT.dominates(Clobber, &*To)) ||
+ MDT.properlyDominates(Clobber->getParent(), MBBTo));
+ };
+
+ return (any_of(Clobbers, interferes)) ||
+ (any_of(Inits, [&](InitListMap::value_type &C) {
+ return C.first != Init.first && any_of(C.second, interferes);
+ }));
+ };
+
+ if (MDT.dominates(MI1, MI2)) {
+ if (!intereferes(MI2, MI1)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber()
+ << " " << *MI2);
+ MI2->eraseFromParent();
+ Defs.erase(I2++);
+ Changed = true;
+ continue;
+ }
+ } else if (MDT.dominates(MI2, MI1)) {
+ if (!intereferes(MI1, MI2)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
+ << " " << *MI1);
+ MI1->eraseFromParent();
+ Defs.erase(I1++);
+ Changed = true;
+ break;
+ }
+ } else {
+ auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
+ MI2->getParent());
+ if (!MBB) {
+ ++I2;
+ continue;
+ }
+
+ MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+ if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
+ << " " << *MI1 << "and moving from BB#"
+ << MI2->getParent()->getNumber() << " to BB#"
+ << I->getParent()->getNumber() << " " << *MI2);
+ I->getParent()->splice(I, MI2->getParent(), MI2);
+ MI1->eraseFromParent();
+ Defs.erase(I1++);
+ Changed = true;
+ break;
+ }
+ }
+ ++I2;
+ }
+ ++I1;
+ }
+ }
+
+ if (Changed)
+ MRI.clearKillFlags(Reg);
+
+ return Changed;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -355,7 +573,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
- MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ TII->moveToVALU(MI);
+ break;
+ }
+
+ MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
unsigned SMovOp;
int64_t Imm;
// If we are just copying an immediate, we can replace the copy with
@@ -367,6 +591,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
TII->moveToVALU(MI);
+ } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
+ tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
}
break;
@@ -382,8 +608,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
- MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
- if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+ if (!predsHasDivergentTerminator(MBB0, TRI) &&
+ !predsHasDivergentTerminator(MBB1, TRI)) {
DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
@@ -458,5 +684,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
+ hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+
return true;
}
OpenPOWER on IntegriCloud