diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp | 152 |
1 files changed, 90 insertions, 62 deletions
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp index ffe3414..ac3d8f6 100644 --- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -14,8 +14,6 @@ #define DEBUG_TYPE "r600cf" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - #include "AMDGPU.h" #include "R600Defines.h" #include "R600InstrInfo.h" @@ -24,8 +22,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; -namespace llvm { +namespace { class R600ControlFlowFinalizer : public MachineFunctionPass { @@ -48,7 +49,7 @@ private: static char ID; const R600InstrInfo *TII; - const R600RegisterInfo &TRI; + const R600RegisterInfo *TRI; unsigned MaxFetchInst; const AMDGPUSubtarget &ST; @@ -64,7 +65,7 @@ private: const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { unsigned Opcode = 0; - bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX); + bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN); switch (CFI) { case CF_TC: Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; @@ -97,7 +98,7 @@ private: Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; break; case CF_END: - if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) { + if (ST.hasCaymanISA()) { Opcode = AMDGPU::CF_END_CM; break; } @@ -109,28 +110,33 @@ private: } bool isCompatibleWithClause(const MachineInstr *MI, - std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const { + std::set<unsigned> &DstRegs) const { unsigned DstMI, SrcMI; for (MachineInstr::const_mop_iterator I = MI->operands_begin(), E = MI->operands_end(); I != E; ++I) { const MachineOperand &MO = *I; if (!MO.isReg()) continue; - if (MO.isDef()) - DstMI = MO.getReg(); + if (MO.isDef()) { + unsigned Reg = MO.getReg(); + if (AMDGPU::R600_Reg128RegClass.contains(Reg)) + DstMI = Reg; + else + DstMI = TRI->getMatchingSuperReg(Reg, + TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), + &AMDGPU::R600_Reg128RegClass); + } if (MO.isUse()) { unsigned Reg = MO.getReg(); if (AMDGPU::R600_Reg128RegClass.contains(Reg)) SrcMI = Reg; else - SrcMI = TRI.getMatchingSuperReg(Reg, - TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)), + SrcMI = TRI->getMatchingSuperReg(Reg, + TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), &AMDGPU::R600_Reg128RegClass); } } - if ((DstRegs.find(SrcMI) == DstRegs.end()) && - (SrcRegs.find(DstMI) == SrcRegs.end())) { - SrcRegs.insert(SrcMI); + if ((DstRegs.find(SrcMI) == DstRegs.end())) { DstRegs.insert(DstMI); return true; } else @@ -144,16 +150,16 @@ private: std::vector<MachineInstr *> ClauseContent; unsigned AluInstCount = 0; bool IsTex = TII->usesTextureCache(ClauseHead); - std::set<unsigned> DstRegs, SrcRegs; + std::set<unsigned> DstRegs; for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; - if (AluInstCount > MaxFetchInst) + if (AluInstCount >= MaxFetchInst) break; if ((IsTex && !TII->usesTextureCache(I)) || (!IsTex && !TII->usesVertexCache(I))) break; - if (!isCompatibleWithClause(I, DstRegs, SrcRegs)) + if (!isCompatibleWithClause(I, DstRegs)) break; AluInstCount ++; ClauseContent.push_back(I); @@ -166,28 +172,26 @@ private: } void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const { - unsigned LiteralRegs[] = { + static const unsigned LiteralRegs[] = { AMDGPU::ALU_LITERAL_X, AMDGPU::ALU_LITERAL_Y, AMDGPU::ALU_LITERAL_Z, AMDGPU::ALU_LITERAL_W }; - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.getReg() != AMDGPU::ALU_LITERAL_X) + const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs = + TII->getSrcs(MI); + for (unsigned i = 0, e = Srcs.size(); i < e; ++i) { + if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X) continue; - unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM); - int64_t Imm = MI->getOperand(ImmIdx).getImm(); + int64_t Imm = Srcs[i].second; std::vector<int64_t>::iterator It = std::find(Lits.begin(), Lits.end(), Imm); if (It != Lits.end()) { unsigned Index = It - Lits.begin(); - MO.setReg(LiteralRegs[Index]); + Srcs[i].first->setReg(LiteralRegs[Index]); } else { assert(Lits.size() < 4 && "Too many literals in Instruction Group"); - MO.setReg(LiteralRegs[Lits.size()]); + Srcs[i].first->setReg(LiteralRegs[Lits.size()]); Lits.push_back(Imm); } } @@ -252,6 +256,7 @@ private: ClauseContent.push_back(MILit); } } + assert(ClauseContent.size() < 128 && "ALU clause is too big"); ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); return ClauseFile(ClauseHead, ClauseContent); } @@ -272,6 +277,7 @@ private: void EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, unsigned &CfCount) { + Clause.first->getOperand(0).setImm(0); CounterPropagateAddr(Clause.first, CfCount); MachineBasicBlock *BB = Clause.first->getParent(); BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) @@ -295,34 +301,35 @@ private: } unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const { - switch (ST.device()->getGeneration()) { - case AMDGPUDeviceInfo::HD4XXX: + switch (ST.getGeneration()) { + case AMDGPUSubtarget::R600: + case AMDGPUSubtarget::R700: if (hasPush) StackSubEntry += 2; break; - case AMDGPUDeviceInfo::HD5XXX: + case AMDGPUSubtarget::EVERGREEN: if (hasPush) StackSubEntry ++; - case AMDGPUDeviceInfo::HD6XXX: + case AMDGPUSubtarget::NORTHERN_ISLANDS: StackSubEntry += 2; break; + default: llvm_unreachable("Not a VLIW4/VLIW5 GPU"); } return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4 } public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())), - TRI(TII->getRegisterInfo()), + TII (0), TRI(0), ST(tm.getSubtarget<AMDGPUSubtarget>()) { const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) - MaxFetchInst = 8; - else - MaxFetchInst = 16; + MaxFetchInst = ST.getTexVTXClauseSize(); } virtual bool runOnMachineFunction(MachineFunction &MF) { + TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); + TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo()); + unsigned MaxStack = 0; unsigned CurrentStack = 0; bool HasPush = false; @@ -340,6 +347,9 @@ public: MaxStack = 1; } std::vector<ClauseFile> FetchClauses, AluClauses; + std::vector<MachineInstr *> LastAlu(1); + std::vector<MachineInstr *> ToPopAfter; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { @@ -350,6 +360,10 @@ public: } MachineBasicBlock::iterator MI = I; + if (MI->getOpcode() != AMDGPU::ENDIF) + LastAlu.back() = 0; + if (MI->getOpcode() == AMDGPU::CF_ALU) + LastAlu.back() = MI; I++; switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: @@ -359,12 +373,6 @@ public: case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); - case AMDGPU::EG_ExportBuf: - case AMDGPU::EG_ExportSwz: - case AMDGPU::R600_ExportBuf: - case AMDGPU::R600_ExportSwz: - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; @@ -395,6 +403,7 @@ public: break; } case AMDGPU::IF_PREDICATE_SET: { + LastAlu.push_back(0); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) @@ -412,7 +421,7 @@ public: MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_ELSE)) .addImm(0) - .addImm(1); + .addImm(0); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); IfThenElseStack.push_back(MIb); MI->eraseFromParent(); @@ -421,31 +430,31 @@ public: } case AMDGPU::ENDIF: { CurrentStack--; + if (LastAlu.back()) { + ToPopAfter.push_back(LastAlu.back()); + } else { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_POP)) + .addImm(CfCount + 1) + .addImm(1); + (void)MIb; + DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + CfCount++; + } + MachineInstr *IfOrElseInst = IfThenElseStack.back(); IfThenElseStack.pop_back(); - CounterPropagateAddr(IfOrElseInst, CfCount + 1); - MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - getHWInstrDesc(CF_POP)) - .addImm(CfCount + 1) - .addImm(1); - (void)MIb; - DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + CounterPropagateAddr(IfOrElseInst, CfCount); + IfOrElseInst->getOperand(1).setImm(1); + LastAlu.pop_back(); MI->eraseFromParent(); - CfCount++; break; } - case AMDGPU::PREDICATED_BREAK: { - CurrentStack--; - CfCount += 3; - BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) - .addImm(CfCount) - .addImm(1); + case AMDGPU::BREAK: { + CfCount ++; MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_LOOP_BREAK)) .addImm(0); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP)) - .addImm(CfCount) - .addImm(1); LoopStack.back().second.insert(MIb); MI->eraseFromParent(); break; @@ -473,9 +482,28 @@ public: EmitALUClause(I, AluClauses[i], CfCount); } default: + if (TII->isExport(MI->getOpcode())) { + DEBUG(dbgs() << CfCount << ":"; MI->dump();); + CfCount++; + } break; } } + for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { + MachineInstr *Alu = ToPopAfter[i]; + BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), + TII->get(AMDGPU::CF_ALU_POP_AFTER)) + .addImm(Alu->getOperand(0).getImm()) + .addImm(Alu->getOperand(1).getImm()) + .addImm(Alu->getOperand(2).getImm()) + .addImm(Alu->getOperand(3).getImm()) + .addImm(Alu->getOperand(4).getImm()) + .addImm(Alu->getOperand(5).getImm()) + .addImm(Alu->getOperand(6).getImm()) + .addImm(Alu->getOperand(7).getImm()) + .addImm(Alu->getOperand(8).getImm()); + Alu->eraseFromParent(); + } MFI->StackSize = getHWStackSize(MaxStack, HasPush); } @@ -489,7 +517,7 @@ public: char R600ControlFlowFinalizer::ID = 0; -} +} // end anonymous namespace llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { |