summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-09-21 22:34:16 +0000
committerdim <dim@FreeBSD.org>2015-09-21 22:34:16 +0000
commitfb090a675ae78b4b2524b69e42790a8308637cde (patch)
tree8a3ab060bcc6d1bc334343abfeb6e7315e61753a /contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
parent4512ff331cc292f4ec66a980cca5d03dd3c7473a (diff)
downloadFreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.zip
FreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.tar.gz
The R600 target got renamed to AMDGPU, but I missed deleting the old
directory during the vendor import. Delete it now.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp679
1 files changed, 0 insertions, 679 deletions
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
deleted file mode 100644
index c8f37f6..0000000
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ /dev/null
@@ -1,679 +0,0 @@
-//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass compute turns all control flow pseudo instructions into native one
-/// computing their address on the fly ; it also sets STACK_SIZE info.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Debug.h"
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "r600cf"
-
-namespace {
-
-struct CFStack {
-
- enum StackItem {
- ENTRY = 0,
- SUB_ENTRY = 1,
- FIRST_NON_WQM_PUSH = 2,
- FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
- };
-
- const AMDGPUSubtarget *ST;
- std::vector<StackItem> BranchStack;
- std::vector<StackItem> LoopStack;
- unsigned MaxStackSize;
- unsigned CurrentEntries;
- unsigned CurrentSubEntries;
-
- CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
- // We need to reserve a stack entry for CALL_FS in vertex shaders.
- MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
- CurrentEntries(0), CurrentSubEntries(0) { }
-
- unsigned getLoopDepth();
- bool branchStackContains(CFStack::StackItem);
- bool requiresWorkAroundForInst(unsigned Opcode);
- unsigned getSubEntrySize(CFStack::StackItem Item);
- void updateMaxStackSize();
- void pushBranch(unsigned Opcode, bool isWQM = false);
- void pushLoop();
- void popBranch();
- void popLoop();
-};
-
-unsigned CFStack::getLoopDepth() {
- return LoopStack.size();
-}
-
-bool CFStack::branchStackContains(CFStack::StackItem Item) {
- for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
- E = BranchStack.end(); I != E; ++I) {
- if (*I == Item)
- return true;
- }
- return false;
-}
-
-bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
- if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
- getLoopDepth() > 1)
- return true;
-
- if (!ST->hasCFAluBug())
- return false;
-
- switch(Opcode) {
- default: return false;
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- case AMDGPU::CF_ALU_ELSE_AFTER:
- case AMDGPU::CF_ALU_BREAK:
- case AMDGPU::CF_ALU_CONTINUE:
- if (CurrentSubEntries == 0)
- return false;
- if (ST->getWavefrontSize() == 64) {
- // We are being conservative here. We only require this work-around if
- // CurrentSubEntries > 3 &&
- // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
- //
- // We have to be conservative, because we don't know for certain that
- // our stack allocation algorithm for Evergreen/NI is correct. Applying this
- // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
- // resources without any problems.
- return CurrentSubEntries > 3;
- } else {
- assert(ST->getWavefrontSize() == 32);
- // We are being conservative here. We only require the work-around if
- // CurrentSubEntries > 7 &&
- // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
- // See the comment on the wavefront size == 64 case for why we are
- // being conservative.
- return CurrentSubEntries > 7;
- }
- }
-}
-
-unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
- switch(Item) {
- default:
- return 0;
- case CFStack::FIRST_NON_WQM_PUSH:
- assert(!ST->hasCaymanISA());
- if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
- // +1 For the push operation.
- // +2 Extra space required.
- return 3;
- } else {
- // Some documentation says that this is not necessary on Evergreen,
- // but experimentation has show that we need to allocate 1 extra
- // sub-entry for the first non-WQM push.
- // +1 For the push operation.
- // +1 Extra space required.
- return 2;
- }
- case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
- assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
- // +1 For the push operation.
- // +1 Extra space required.
- return 2;
- case CFStack::SUB_ENTRY:
- return 1;
- }
-}
-
-void CFStack::updateMaxStackSize() {
- unsigned CurrentStackSize = CurrentEntries +
- (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
- MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
-}
-
-void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
- CFStack::StackItem Item = CFStack::ENTRY;
- switch(Opcode) {
- case AMDGPU::CF_PUSH_EG:
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (!isWQM) {
- if (!ST->hasCaymanISA() &&
- !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
- Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
- // See comment in
- // CFStack::getSubEntrySize()
- else if (CurrentEntries > 0 &&
- ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
- !ST->hasCaymanISA() &&
- !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
- Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
- else
- Item = CFStack::SUB_ENTRY;
- } else
- Item = CFStack::ENTRY;
- break;
- }
- BranchStack.push_back(Item);
- if (Item == CFStack::ENTRY)
- CurrentEntries++;
- else
- CurrentSubEntries += getSubEntrySize(Item);
- updateMaxStackSize();
-}
-
-void CFStack::pushLoop() {
- LoopStack.push_back(CFStack::ENTRY);
- CurrentEntries++;
- updateMaxStackSize();
-}
-
-void CFStack::popBranch() {
- CFStack::StackItem Top = BranchStack.back();
- if (Top == CFStack::ENTRY)
- CurrentEntries--;
- else
- CurrentSubEntries-= getSubEntrySize(Top);
- BranchStack.pop_back();
-}
-
-void CFStack::popLoop() {
- CurrentEntries--;
- LoopStack.pop_back();
-}
-
-class R600ControlFlowFinalizer : public MachineFunctionPass {
-
-private:
- typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
-
- enum ControlFlowInstruction {
- CF_TC,
- CF_VC,
- CF_CALL_FS,
- CF_WHILE_LOOP,
- CF_END_LOOP,
- CF_LOOP_BREAK,
- CF_LOOP_CONTINUE,
- CF_JUMP,
- CF_ELSE,
- CF_POP,
- CF_END
- };
-
- static char ID;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
- unsigned MaxFetchInst;
- const AMDGPUSubtarget *ST;
-
- bool IsTrivialInst(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case AMDGPU::KILL:
- case AMDGPU::RETURN:
- return true;
- default:
- return false;
- }
- }
-
- const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
- unsigned Opcode = 0;
- bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
- switch (CFI) {
- case CF_TC:
- Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
- break;
- case CF_VC:
- Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
- break;
- case CF_CALL_FS:
- Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
- break;
- case CF_WHILE_LOOP:
- Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
- break;
- case CF_END_LOOP:
- Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
- break;
- case CF_LOOP_BREAK:
- Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
- break;
- case CF_LOOP_CONTINUE:
- Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
- break;
- case CF_JUMP:
- Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
- break;
- case CF_ELSE:
- Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
- break;
- case CF_POP:
- Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
- break;
- case CF_END:
- if (ST->hasCaymanISA()) {
- Opcode = AMDGPU::CF_END_CM;
- break;
- }
- Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
- break;
- }
- assert (Opcode && "No opcode selected");
- return TII->get(Opcode);
- }
-
- bool isCompatibleWithClause(const MachineInstr *MI,
- std::set<unsigned> &DstRegs) const {
- unsigned DstMI, SrcMI;
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
- const MachineOperand &MO = *I;
- if (!MO.isReg())
- continue;
- if (MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (AMDGPU::R600_Reg128RegClass.contains(Reg))
- DstMI = Reg;
- else
- DstMI = TRI->getMatchingSuperReg(Reg,
- TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
- &AMDGPU::R600_Reg128RegClass);
- }
- if (MO.isUse()) {
- unsigned Reg = MO.getReg();
- if (AMDGPU::R600_Reg128RegClass.contains(Reg))
- SrcMI = Reg;
- else
- SrcMI = TRI->getMatchingSuperReg(Reg,
- TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
- &AMDGPU::R600_Reg128RegClass);
- }
- }
- if ((DstRegs.find(SrcMI) == DstRegs.end())) {
- DstRegs.insert(DstMI);
- return true;
- } else
- return false;
- }
-
- ClauseFile
- MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
- const {
- MachineBasicBlock::iterator ClauseHead = I;
- std::vector<MachineInstr *> ClauseContent;
- unsigned AluInstCount = 0;
- bool IsTex = TII->usesTextureCache(ClauseHead);
- std::set<unsigned> DstRegs;
- for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
- if (IsTrivialInst(I))
- continue;
- if (AluInstCount >= MaxFetchInst)
- break;
- if ((IsTex && !TII->usesTextureCache(I)) ||
- (!IsTex && !TII->usesVertexCache(I)))
- break;
- if (!isCompatibleWithClause(I, DstRegs))
- break;
- AluInstCount ++;
- ClauseContent.push_back(I);
- }
- MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
- getHWInstrDesc(IsTex?CF_TC:CF_VC))
- .addImm(0) // ADDR
- .addImm(AluInstCount - 1); // COUNT
- return ClauseFile(MIb, std::move(ClauseContent));
- }
-
- void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
- static const unsigned LiteralRegs[] = {
- AMDGPU::ALU_LITERAL_X,
- AMDGPU::ALU_LITERAL_Y,
- AMDGPU::ALU_LITERAL_Z,
- AMDGPU::ALU_LITERAL_W
- };
- const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
- TII->getSrcs(MI);
- for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
- if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
- continue;
- int64_t Imm = Srcs[i].second;
- std::vector<int64_t>::iterator It =
- std::find(Lits.begin(), Lits.end(), Imm);
- if (It != Lits.end()) {
- unsigned Index = It - Lits.begin();
- Srcs[i].first->setReg(LiteralRegs[Index]);
- } else {
- assert(Lits.size() < 4 && "Too many literals in Instruction Group");
- Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
- Lits.push_back(Imm);
- }
- }
- }
-
- MachineBasicBlock::iterator insertLiterals(
- MachineBasicBlock::iterator InsertPos,
- const std::vector<unsigned> &Literals) const {
- MachineBasicBlock *MBB = InsertPos->getParent();
- for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
- unsigned LiteralPair0 = Literals[i];
- unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
- InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
- TII->get(AMDGPU::LITERALS))
- .addImm(LiteralPair0)
- .addImm(LiteralPair1);
- }
- return InsertPos;
- }
-
- ClauseFile
- MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
- const {
- MachineBasicBlock::iterator ClauseHead = I;
- std::vector<MachineInstr *> ClauseContent;
- I++;
- for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
- if (IsTrivialInst(I)) {
- ++I;
- continue;
- }
- if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
- break;
- std::vector<int64_t> Literals;
- if (I->isBundle()) {
- MachineInstr *DeleteMI = I;
- MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
- while (++BI != E && BI->isBundledWithPred()) {
- BI->unbundleFromPred();
- for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = BI->getOperand(i);
- if (MO.isReg() && MO.isInternalRead())
- MO.setIsInternalRead(false);
- }
- getLiteral(BI, Literals);
- ClauseContent.push_back(BI);
- }
- I = BI;
- DeleteMI->eraseFromParent();
- } else {
- getLiteral(I, Literals);
- ClauseContent.push_back(I);
- I++;
- }
- for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
- unsigned literal0 = Literals[i];
- unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
- MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
- TII->get(AMDGPU::LITERALS))
- .addImm(literal0)
- .addImm(literal2);
- ClauseContent.push_back(MILit);
- }
- }
- assert(ClauseContent.size() < 128 && "ALU clause is too big");
- ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
- return ClauseFile(ClauseHead, std::move(ClauseContent));
- }
-
- void
- EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
- unsigned &CfCount) {
- CounterPropagateAddr(Clause.first, CfCount);
- MachineBasicBlock *BB = Clause.first->getParent();
- BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
- .addImm(CfCount);
- for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
- BB->splice(InsertPos, BB, Clause.second[i]);
- }
- CfCount += 2 * Clause.second.size();
- }
-
- void
- EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
- unsigned &CfCount) {
- Clause.first->getOperand(0).setImm(0);
- CounterPropagateAddr(Clause.first, CfCount);
- MachineBasicBlock *BB = Clause.first->getParent();
- BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
- .addImm(CfCount);
- for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
- BB->splice(InsertPos, BB, Clause.second[i]);
- }
- CfCount += Clause.second.size();
- }
-
- void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
- MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
- }
- void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
- unsigned Addr) const {
- for (MachineInstr *MI : MIs) {
- CounterPropagateAddr(MI, Addr);
- }
- }
-
-public:
- R600ControlFlowFinalizer(TargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- ST = &MF.getSubtarget<AMDGPUSubtarget>();
- MaxFetchInst = ST->getTexVTXClauseSize();
- TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
- TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-
- CFStack CFStack(ST, MFI->getShaderType());
- for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
- ++MB) {
- MachineBasicBlock &MBB = *MB;
- unsigned CfCount = 0;
- std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
- std::vector<MachineInstr * > IfThenElseStack;
- if (MFI->getShaderType() == ShaderType::VERTEX) {
- BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
- getHWInstrDesc(CF_CALL_FS));
- CfCount++;
- }
- std::vector<ClauseFile> FetchClauses, AluClauses;
- std::vector<MachineInstr *> LastAlu(1);
- std::vector<MachineInstr *> ToPopAfter;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E;) {
- if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
- DEBUG(dbgs() << CfCount << ":"; I->dump(););
- FetchClauses.push_back(MakeFetchClause(MBB, I));
- CfCount++;
- LastAlu.back() = nullptr;
- continue;
- }
-
- MachineBasicBlock::iterator MI = I;
- if (MI->getOpcode() != AMDGPU::ENDIF)
- LastAlu.back() = nullptr;
- if (MI->getOpcode() == AMDGPU::CF_ALU)
- LastAlu.back() = MI;
- I++;
- bool RequiresWorkAround =
- CFStack.requiresWorkAroundForInst(MI->getOpcode());
- switch (MI->getOpcode()) {
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (RequiresWorkAround) {
- DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
- .addImm(CfCount + 1)
- .addImm(1);
- MI->setDesc(TII->get(AMDGPU::CF_ALU));
- CfCount++;
- CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
- } else
- CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
-
- case AMDGPU::CF_ALU:
- I = MI;
- AluClauses.push_back(MakeALUClause(MBB, I));
- DEBUG(dbgs() << CfCount << ":"; MI->dump(););
- CfCount++;
- break;
- case AMDGPU::WHILELOOP: {
- CFStack.pushLoop();
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_WHILE_LOOP))
- .addImm(1);
- std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
- std::set<MachineInstr *>());
- Pair.second.insert(MIb);
- LoopStack.push_back(std::move(Pair));
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::ENDLOOP: {
- CFStack.popLoop();
- std::pair<unsigned, std::set<MachineInstr *> > Pair =
- std::move(LoopStack.back());
- LoopStack.pop_back();
- CounterPropagateAddr(Pair.second, CfCount);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
- .addImm(Pair.first + 1);
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::IF_PREDICATE_SET: {
- LastAlu.push_back(nullptr);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_JUMP))
- .addImm(0)
- .addImm(0);
- IfThenElseStack.push_back(MIb);
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::ELSE: {
- MachineInstr * JumpInst = IfThenElseStack.back();
- IfThenElseStack.pop_back();
- CounterPropagateAddr(JumpInst, CfCount);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_ELSE))
- .addImm(0)
- .addImm(0);
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
- IfThenElseStack.push_back(MIb);
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::ENDIF: {
- CFStack.popBranch();
- if (LastAlu.back()) {
- ToPopAfter.push_back(LastAlu.back());
- } else {
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_POP))
- .addImm(CfCount + 1)
- .addImm(1);
- (void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
- CfCount++;
- }
-
- MachineInstr *IfOrElseInst = IfThenElseStack.back();
- IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount);
- IfOrElseInst->getOperand(1).setImm(1);
- LastAlu.pop_back();
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::BREAK: {
- CfCount ++;
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_LOOP_BREAK))
- .addImm(0);
- LoopStack.back().second.insert(MIb);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::CONTINUE: {
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_LOOP_CONTINUE))
- .addImm(0);
- LoopStack.back().second.insert(MIb);
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::RETURN: {
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
- CfCount++;
- MI->eraseFromParent();
- if (CfCount % 2) {
- BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
- CfCount++;
- }
- for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
- EmitFetchClause(I, FetchClauses[i], CfCount);
- for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
- EmitALUClause(I, AluClauses[i], CfCount);
- }
- default:
- if (TII->isExport(MI->getOpcode())) {
- DEBUG(dbgs() << CfCount << ":"; MI->dump(););
- CfCount++;
- }
- break;
- }
- }
- for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
- MachineInstr *Alu = ToPopAfter[i];
- BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
- TII->get(AMDGPU::CF_ALU_POP_AFTER))
- .addImm(Alu->getOperand(0).getImm())
- .addImm(Alu->getOperand(1).getImm())
- .addImm(Alu->getOperand(2).getImm())
- .addImm(Alu->getOperand(3).getImm())
- .addImm(Alu->getOperand(4).getImm())
- .addImm(Alu->getOperand(5).getImm())
- .addImm(Alu->getOperand(6).getImm())
- .addImm(Alu->getOperand(7).getImm())
- .addImm(Alu->getOperand(8).getImm());
- Alu->eraseFromParent();
- }
- MFI->StackSize = CFStack.MaxStackSize;
- }
-
- return false;
- }
-
- const char *getPassName() const override {
- return "R600 Control Flow Finalizer Pass";
- }
-};
-
-char R600ControlFlowFinalizer::ID = 0;
-
-} // end anonymous namespace
-
-
-llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
- return new R600ControlFlowFinalizer(TM);
-}
OpenPOWER on IntegriCloud