summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp158
1 files changed, 90 insertions, 68 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index bd80bb2..d5bda4a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -39,16 +39,16 @@ struct CFStack {
FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
};
- const AMDGPUSubtarget *ST;
+ const R600Subtarget *ST;
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
unsigned CurrentEntries;
unsigned CurrentSubEntries;
- CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
+ CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
- MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
+ MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
CurrentEntries(0), CurrentSubEntries(0) { }
unsigned getLoopDepth();
@@ -119,7 +119,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
return 0;
case CFStack::FIRST_NON_WQM_PUSH:
assert(!ST->hasCaymanISA());
- if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
+ if (ST->getGeneration() <= R600Subtarget::R700) {
// +1 For the push operation.
// +2 Extra space required.
return 3;
@@ -132,7 +132,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
return 2;
}
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
- assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ assert(ST->getGeneration() >= R600Subtarget::EVERGREEN);
// +1 For the push operation.
// +1 Extra space required.
return 2;
@@ -142,8 +142,8 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
}
void CFStack::updateMaxStackSize() {
- unsigned CurrentStackSize = CurrentEntries +
- (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
+ unsigned CurrentStackSize =
+ CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
}
@@ -159,7 +159,7 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
// See comment in
// CFStack::getSubEntrySize()
else if (CurrentEntries > 0 &&
- ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+ ST->getGeneration() > R600Subtarget::EVERGREEN &&
!ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
@@ -220,10 +220,10 @@ private:
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
unsigned MaxFetchInst;
- const AMDGPUSubtarget *ST;
+ const R600Subtarget *ST;
- bool IsTrivialInst(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+ bool IsTrivialInst(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
return true;
@@ -234,7 +234,7 @@ private:
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
unsigned Opcode = 0;
- bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
@@ -278,11 +278,12 @@ private:
return TII->get(Opcode);
}
- bool isCompatibleWithClause(const MachineInstr *MI,
- std::set<unsigned> &DstRegs) const {
+ bool isCompatibleWithClause(const MachineInstr &MI,
+ std::set<unsigned> &DstRegs) const {
unsigned DstMI, SrcMI;
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
+ for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
+ E = MI.operands_end();
+ I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
@@ -318,20 +319,20 @@ private:
MachineBasicBlock::iterator ClauseHead = I;
std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0;
- bool IsTex = TII->usesTextureCache(ClauseHead);
+ bool IsTex = TII->usesTextureCache(*ClauseHead);
std::set<unsigned> DstRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
- if (IsTrivialInst(I))
+ if (IsTrivialInst(*I))
continue;
if (AluInstCount >= MaxFetchInst)
break;
- if ((IsTex && !TII->usesTextureCache(I)) ||
- (!IsTex && !TII->usesVertexCache(I)))
+ if ((IsTex && !TII->usesTextureCache(*I)) ||
+ (!IsTex && !TII->usesVertexCache(*I)))
break;
- if (!isCompatibleWithClause(I, DstRegs))
+ if (!isCompatibleWithClause(*I, DstRegs))
break;
AluInstCount ++;
- ClauseContent.push_back(I);
+ ClauseContent.push_back(&*I);
}
MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
getHWInstrDesc(IsTex?CF_TC:CF_VC))
@@ -340,28 +341,37 @@ private:
return ClauseFile(MIb, std::move(ClauseContent));
}
- void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
+ void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_Z,
AMDGPU::ALU_LITERAL_W
};
- const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
TII->getSrcs(MI);
- for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
- if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
+ for (const auto &Src:Srcs) {
+ if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
continue;
- int64_t Imm = Srcs[i].second;
- std::vector<int64_t>::iterator It =
- std::find(Lits.begin(), Lits.end(), Imm);
+ int64_t Imm = Src.second;
+ std::vector<MachineOperand*>::iterator It =
+ std::find_if(Lits.begin(), Lits.end(),
+ [&](MachineOperand* val)
+ { return val->isImm() && (val->getImm() == Imm);});
+
+ // Get corresponding Operand
+ MachineOperand &Operand = MI.getOperand(
+ TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
+
if (It != Lits.end()) {
+ // Reuse existing literal reg
unsigned Index = It - Lits.begin();
- Srcs[i].first->setReg(LiteralRegs[Index]);
+ Src.first->setReg(LiteralRegs[Index]);
} else {
+ // Allocate new literal reg
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
- Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
- Lits.push_back(Imm);
+ Src.first->setReg(LiteralRegs[Lits.size()]);
+ Lits.push_back(&Operand);
}
}
}
@@ -384,56 +394,66 @@ private:
ClauseFile
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
const {
- MachineBasicBlock::iterator ClauseHead = I;
+ MachineInstr &ClauseHead = *I;
std::vector<MachineInstr *> ClauseContent;
I++;
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
- if (IsTrivialInst(I)) {
+ if (IsTrivialInst(*I)) {
++I;
continue;
}
if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
break;
- std::vector<int64_t> Literals;
+ std::vector<MachineOperand *>Literals;
if (I->isBundle()) {
- MachineInstr *DeleteMI = I;
+ MachineInstr &DeleteMI = *I;
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
while (++BI != E && BI->isBundledWithPred()) {
BI->unbundleFromPred();
- for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = BI->getOperand(i);
+ for (MachineOperand &MO : BI->operands()) {
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
- getLiteral(&*BI, Literals);
+ getLiteral(*BI, Literals);
ClauseContent.push_back(&*BI);
}
I = BI;
- DeleteMI->eraseFromParent();
+ DeleteMI.eraseFromParent();
} else {
- getLiteral(I, Literals);
- ClauseContent.push_back(I);
+ getLiteral(*I, Literals);
+ ClauseContent.push_back(&*I);
I++;
}
- for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
- unsigned literal0 = Literals[i];
- unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
- MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
- TII->get(AMDGPU::LITERALS))
- .addImm(literal0)
- .addImm(literal2);
+ for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
+ MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
+ TII->get(AMDGPU::LITERALS));
+ if (Literals[i]->isImm()) {
+ MILit.addImm(Literals[i]->getImm());
+ } else {
+ MILit.addGlobalAddress(Literals[i]->getGlobal(),
+ Literals[i]->getOffset());
+ }
+ if (i + 1 < e) {
+ if (Literals[i + 1]->isImm()) {
+ MILit.addImm(Literals[i + 1]->getImm());
+ } else {
+ MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
+ Literals[i + 1]->getOffset());
+ }
+ } else
+ MILit.addImm(0);
ClauseContent.push_back(MILit);
}
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
- ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
- return ClauseFile(ClauseHead, std::move(ClauseContent));
+ ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
+ return ClauseFile(&ClauseHead, std::move(ClauseContent));
}
void
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
- CounterPropagateAddr(Clause.first, CfCount);
+ CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
.addImm(CfCount);
@@ -447,7 +467,7 @@ private:
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
Clause.first->getOperand(0).setImm(0);
- CounterPropagateAddr(Clause.first, CfCount);
+ CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
.addImm(CfCount);
@@ -457,13 +477,13 @@ private:
CfCount += Clause.second.size();
}
- void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
- MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+ void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
+ MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
}
void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
unsigned Addr) const {
for (MachineInstr *MI : MIs) {
- CounterPropagateAddr(MI, Addr);
+ CounterPropagateAddr(*MI, Addr);
}
}
@@ -472,20 +492,21 @@ public:
: MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
- ST = &MF.getSubtarget<AMDGPUSubtarget>();
+ ST = &MF.getSubtarget<R600Subtarget>();
MaxFetchInst = ST->getTexVTXClauseSize();
- TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
- TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
+ TII = ST->getInstrInfo();
+ TRI = ST->getRegisterInfo();
+
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- CFStack CFStack(ST, MFI->getShaderType());
+ CFStack CFStack(ST, MF.getFunction()->getCallingConv());
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
- if (MFI->getShaderType() == ShaderType::VERTEX) {
+ if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
getHWInstrDesc(CF_CALL_FS));
CfCount++;
@@ -493,10 +514,10 @@ public:
std::vector<ClauseFile> FetchClauses, AluClauses;
std::vector<MachineInstr *> LastAlu(1);
std::vector<MachineInstr *> ToPopAfter;
-
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
- if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
+ if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
DEBUG(dbgs() << CfCount << ":"; I->dump(););
FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
@@ -508,7 +529,7 @@ public:
if (MI->getOpcode() != AMDGPU::ENDIF)
LastAlu.back() = nullptr;
if (MI->getOpcode() == AMDGPU::CF_ALU)
- LastAlu.back() = MI;
+ LastAlu.back() = &*MI;
I++;
bool RequiresWorkAround =
CFStack.requiresWorkAroundForInst(MI->getOpcode());
@@ -571,7 +592,7 @@ public:
case AMDGPU::ELSE: {
MachineInstr * JumpInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(JumpInst, CfCount);
+ CounterPropagateAddr(*JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
@@ -595,10 +616,10 @@ public:
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
CfCount++;
}
-
+
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount);
+ CounterPropagateAddr(*IfOrElseInst, CfCount);
IfOrElseInst->getOperand(1).setImm(1);
LastAlu.pop_back();
MI->eraseFromParent();
@@ -625,15 +646,16 @@ public:
case AMDGPU::RETURN: {
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
CfCount++;
- MI->eraseFromParent();
if (CfCount % 2) {
BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
CfCount++;
}
+ MI->eraseFromParent();
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
EmitFetchClause(I, FetchClauses[i], CfCount);
for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
EmitALUClause(I, AluClauses[i], CfCount);
+ break;
}
default:
if (TII->isExport(MI->getOpcode())) {
OpenPOWER on IntegriCloud