summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp152
1 files changed, 90 insertions, 62 deletions
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index ffe3414..ac3d8f6 100644
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -14,8 +14,6 @@
#define DEBUG_TYPE "r600cf"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
@@ -24,8 +22,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
-namespace llvm {
+namespace {
class R600ControlFlowFinalizer : public MachineFunctionPass {
@@ -48,7 +49,7 @@ private:
static char ID;
const R600InstrInfo *TII;
- const R600RegisterInfo &TRI;
+ const R600RegisterInfo *TRI;
unsigned MaxFetchInst;
const AMDGPUSubtarget &ST;
@@ -64,7 +65,7 @@ private:
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
unsigned Opcode = 0;
- bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
+ bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
@@ -97,7 +98,7 @@ private:
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
break;
case CF_END:
- if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
+ if (ST.hasCaymanISA()) {
Opcode = AMDGPU::CF_END_CM;
break;
}
@@ -109,28 +110,33 @@ private:
}
bool isCompatibleWithClause(const MachineInstr *MI,
- std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const {
+ std::set<unsigned> &DstRegs) const {
unsigned DstMI, SrcMI;
for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
E = MI->operands_end(); I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
- if (MO.isDef())
- DstMI = MO.getReg();
+ if (MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+ DstMI = Reg;
+ else
+ DstMI = TRI->getMatchingSuperReg(Reg,
+ TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
+ &AMDGPU::R600_Reg128RegClass);
+ }
if (MO.isUse()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
SrcMI = Reg;
else
- SrcMI = TRI.getMatchingSuperReg(Reg,
- TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)),
+ SrcMI = TRI->getMatchingSuperReg(Reg,
+ TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
}
}
- if ((DstRegs.find(SrcMI) == DstRegs.end()) &&
- (SrcRegs.find(DstMI) == SrcRegs.end())) {
- SrcRegs.insert(SrcMI);
+ if ((DstRegs.find(SrcMI) == DstRegs.end())) {
DstRegs.insert(DstMI);
return true;
} else
@@ -144,16 +150,16 @@ private:
std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0;
bool IsTex = TII->usesTextureCache(ClauseHead);
- std::set<unsigned> DstRegs, SrcRegs;
+ std::set<unsigned> DstRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I))
continue;
- if (AluInstCount > MaxFetchInst)
+ if (AluInstCount >= MaxFetchInst)
break;
if ((IsTex && !TII->usesTextureCache(I)) ||
(!IsTex && !TII->usesVertexCache(I)))
break;
- if (!isCompatibleWithClause(I, DstRegs, SrcRegs))
+ if (!isCompatibleWithClause(I, DstRegs))
break;
AluInstCount ++;
ClauseContent.push_back(I);
@@ -166,28 +172,26 @@ private:
}
void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
- unsigned LiteralRegs[] = {
+ static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_Z,
AMDGPU::ALU_LITERAL_W
};
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
+ TII->getSrcs(MI);
+ for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
+ if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
continue;
- unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
- int64_t Imm = MI->getOperand(ImmIdx).getImm();
+ int64_t Imm = Srcs[i].second;
std::vector<int64_t>::iterator It =
std::find(Lits.begin(), Lits.end(), Imm);
if (It != Lits.end()) {
unsigned Index = It - Lits.begin();
- MO.setReg(LiteralRegs[Index]);
+ Srcs[i].first->setReg(LiteralRegs[Index]);
} else {
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
- MO.setReg(LiteralRegs[Lits.size()]);
+ Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
Lits.push_back(Imm);
}
}
@@ -252,6 +256,7 @@ private:
ClauseContent.push_back(MILit);
}
}
+ assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
return ClauseFile(ClauseHead, ClauseContent);
}
@@ -272,6 +277,7 @@ private:
void
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
+ Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
@@ -295,34 +301,35 @@ private:
}
unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
- switch (ST.device()->getGeneration()) {
- case AMDGPUDeviceInfo::HD4XXX:
+ switch (ST.getGeneration()) {
+ case AMDGPUSubtarget::R600:
+ case AMDGPUSubtarget::R700:
if (hasPush)
StackSubEntry += 2;
break;
- case AMDGPUDeviceInfo::HD5XXX:
+ case AMDGPUSubtarget::EVERGREEN:
if (hasPush)
StackSubEntry ++;
- case AMDGPUDeviceInfo::HD6XXX:
+ case AMDGPUSubtarget::NORTHERN_ISLANDS:
StackSubEntry += 2;
break;
+ default: llvm_unreachable("Not a VLIW4/VLIW5 GPU");
}
return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
}
public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
- TRI(TII->getRegisterInfo()),
+ TII (0), TRI(0),
ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
- MaxFetchInst = 8;
- else
- MaxFetchInst = 16;
+ MaxFetchInst = ST.getTexVTXClauseSize();
}
virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
+
unsigned MaxStack = 0;
unsigned CurrentStack = 0;
bool HasPush = false;
@@ -340,6 +347,9 @@ public:
MaxStack = 1;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
+ std::vector<MachineInstr *> LastAlu(1);
+ std::vector<MachineInstr *> ToPopAfter;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
@@ -350,6 +360,10 @@ public:
}
MachineBasicBlock::iterator MI = I;
+ if (MI->getOpcode() != AMDGPU::ENDIF)
+ LastAlu.back() = 0;
+ if (MI->getOpcode() == AMDGPU::CF_ALU)
+ LastAlu.back() = MI;
I++;
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
@@ -359,12 +373,6 @@ public:
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
- case AMDGPU::EG_ExportBuf:
- case AMDGPU::EG_ExportSwz:
- case AMDGPU::R600_ExportBuf:
- case AMDGPU::R600_ExportSwz:
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
@@ -395,6 +403,7 @@ public:
break;
}
case AMDGPU::IF_PREDICATE_SET: {
+ LastAlu.push_back(0);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
@@ -412,7 +421,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
- .addImm(1);
+ .addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
@@ -421,31 +430,31 @@ public:
}
case AMDGPU::ENDIF: {
CurrentStack--;
+ if (LastAlu.back()) {
+ ToPopAfter.push_back(LastAlu.back());
+ } else {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ getHWInstrDesc(CF_POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ (void)MIb;
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CfCount++;
+ }
+
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount + 1);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_POP))
- .addImm(CfCount + 1)
- .addImm(1);
- (void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CounterPropagateAddr(IfOrElseInst, CfCount);
+ IfOrElseInst->getOperand(1).setImm(1);
+ LastAlu.pop_back();
MI->eraseFromParent();
- CfCount++;
break;
}
- case AMDGPU::PREDICATED_BREAK: {
- CurrentStack--;
- CfCount += 3;
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
- .addImm(CfCount)
- .addImm(1);
+ case AMDGPU::BREAK: {
+ CfCount ++;
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
- .addImm(CfCount)
- .addImm(1);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
break;
@@ -473,9 +482,28 @@ public:
EmitALUClause(I, AluClauses[i], CfCount);
}
default:
+ if (TII->isExport(MI->getOpcode())) {
+ DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ CfCount++;
+ }
break;
}
}
+ for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
+ MachineInstr *Alu = ToPopAfter[i];
+ BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
+ TII->get(AMDGPU::CF_ALU_POP_AFTER))
+ .addImm(Alu->getOperand(0).getImm())
+ .addImm(Alu->getOperand(1).getImm())
+ .addImm(Alu->getOperand(2).getImm())
+ .addImm(Alu->getOperand(3).getImm())
+ .addImm(Alu->getOperand(4).getImm())
+ .addImm(Alu->getOperand(5).getImm())
+ .addImm(Alu->getOperand(6).getImm())
+ .addImm(Alu->getOperand(7).getImm())
+ .addImm(Alu->getOperand(8).getImm());
+ Alu->eraseFromParent();
+ }
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}
@@ -489,7 +517,7 @@ public:
char R600ControlFlowFinalizer::ID = 0;
-}
+} // end anonymous namespace
llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
OpenPOWER on IntegriCloud