diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/R600Packetizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/R600Packetizer.cpp | 459 |
1 files changed, 459 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp new file mode 100644 index 0000000..cd7b7d0 --- /dev/null +++ b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp @@ -0,0 +1,459 @@ +//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass implements instructions packetization for R600. It unsets isLast +/// bit of instructions inside a bundle and substitutes src register with +/// PreviousVector when applicable. +// +//===----------------------------------------------------------------------===// + +#ifndef R600PACKETIZER_CPP +#define R600PACKETIZER_CPP + +#define DEBUG_TYPE "packets" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "AMDGPU.h" +#include "R600InstrInfo.h" + +namespace llvm { + +class R600Packetizer : public MachineFunctionPass { + +public: + static char ID; + R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "R600 Packetizer"; + } + + bool runOnMachineFunction(MachineFunction &Fn); +}; +char R600Packetizer::ID = 0; + +class R600PacketizerList : public VLIWPacketizerList { + +private: + const R600InstrInfo *TII; + const R600RegisterInfo &TRI; + + enum BankSwizzle { + ALU_VEC_012 = 0, + ALU_VEC_021, + ALU_VEC_120, + ALU_VEC_102, + ALU_VEC_201, + ALU_VEC_210 + }; + + unsigned getSlot(const MachineInstr *MI) const { + return TRI.getHWRegChan(MI->getOperand(0).getReg()); + } + + /// \returns register to PV chan mapping for bundle/single instructions that + /// immediatly precedes I. + DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) + const { + DenseMap<unsigned, unsigned> Result; + I--; + if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) + return Result; + MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); + if (I->isBundle()) + BI++; + do { + if (TII->isPredicated(BI)) + continue; + if (TII->isTransOnly(BI)) + continue; + int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE); + if (OperandIdx < 0) + continue; + if (BI->getOperand(OperandIdx).getImm() == 0) + continue; + unsigned Dst = BI->getOperand(0).getReg(); + if (BI->getOpcode() == AMDGPU::DOT4_r600_real) { + Result[Dst] = AMDGPU::PV_X; + continue; + } + unsigned PVReg = 0; + switch (TRI.getHWRegChan(Dst)) { + case 0: + PVReg = AMDGPU::PV_X; + break; + case 1: + PVReg = AMDGPU::PV_Y; + break; + case 2: + PVReg = AMDGPU::PV_Z; + break; + case 3: + PVReg = AMDGPU::PV_W; + break; + default: + llvm_unreachable("Invalid Chan"); + } + Result[Dst] = PVReg; + } while ((++BI)->isBundledWithPred()); + return Result; + } + + void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs) + const { + R600Operands::Ops Ops[] = { + R600Operands::SRC0, + R600Operands::SRC1, + R600Operands::SRC2 + }; + for (unsigned i = 0; i < 3; i++) { + int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); + if (OperandIdx < 0) + continue; + unsigned Src = MI->getOperand(OperandIdx).getReg(); + const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); + if (It != PVs.end()) + MI->getOperand(OperandIdx).setReg(It->second); + } + } +public: + // Ctor. + R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT) + : VLIWPacketizerList(MF, MLI, MDT, true), + TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())), + TRI(TII->getRegisterInfo()) { } + + // initPacketizerState - initialize some internal flags. + void initPacketizerState() { } + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { + return false; + } + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(MachineInstr *MI) { + if (TII->isVector(*MI)) + return true; + if (!TII->isALUInstr(MI->getOpcode())) + return true; + if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY) + return true; + if (TII->isTransOnly(MI)) + return true; + return false; + } + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); + if (getSlot(MII) <= getSlot(MIJ)) + return false; + // Does MII and MIJ share the same pred_sel ? + int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL), + OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL); + unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, + PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; + if (PredI != PredJ) + return false; + if (SUJ->isSucc(SUI)) { + for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { + const SDep &Dep = SUJ->Succs[i]; + if (Dep.getSUnit() != SUI) + continue; + if (Dep.getKind() == SDep::Anti) + continue; + if (Dep.getKind() == SDep::Output) + if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) + continue; + return false; + } + } + return true; + } + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;} + + void setIsLastBit(MachineInstr *MI, unsigned Bit) const { + unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST); + MI->getOperand(LastOp).setImm(Bit); + } + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + CurrentPacketMIs.push_back(MI); + bool FitsConstLimits = TII->canBundle(CurrentPacketMIs); + DEBUG( + if (!FitsConstLimits) { + dbgs() << "Couldn't pack :\n"; + MI->dump(); + dbgs() << "with the following packets :\n"; + for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { + CurrentPacketMIs[i]->dump(); + dbgs() << "\n"; + } + dbgs() << "because of Consts read limitations\n"; + }); + const DenseMap<unsigned, unsigned> &PV = + getPreviousVector(CurrentPacketMIs.front()); + bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV); + DEBUG( + if (!FitsReadPortLimits) { + dbgs() << "Couldn't pack :\n"; + MI->dump(); + dbgs() << "with the following packets :\n"; + for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { + CurrentPacketMIs[i]->dump(); + dbgs() << "\n"; + } + dbgs() << "because of Read port limitations\n"; + }); + bool isBundlable = FitsConstLimits && FitsReadPortLimits; + CurrentPacketMIs.pop_back(); + if (!isBundlable) { + endPacket(MI->getParent(), MI); + substitutePV(MI, getPreviousVector(MI)); + return VLIWPacketizerList::addToPacket(MI); + } + if (!CurrentPacketMIs.empty()) + setIsLastBit(CurrentPacketMIs.back(), 0); + substitutePV(MI, PV); + return VLIWPacketizerList::addToPacket(MI); + } +private: + std::vector<std::pair<int, unsigned> > + ExtractSrcs(const MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV) + const { + R600Operands::Ops Ops[] = { + R600Operands::SRC0, + R600Operands::SRC1, + R600Operands::SRC2 + }; + std::vector<std::pair<int, unsigned> > Result; + for (unsigned i = 0; i < 3; i++) { + int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); + if (OperandIdx < 0){ + Result.push_back(std::pair<int, unsigned>(-1,0)); + continue; + } + unsigned Src = MI->getOperand(OperandIdx).getReg(); + if (PV.find(Src) != PV.end()) { + Result.push_back(std::pair<int, unsigned>(-1,0)); + continue; + } + unsigned Reg = TRI.getEncodingValue(Src) & 0xff; + if (Reg > 127) { + Result.push_back(std::pair<int, unsigned>(-1,0)); + continue; + } + unsigned Chan = TRI.getHWRegChan(Src); + Result.push_back(std::pair<int, unsigned>(Reg, Chan)); + } + return Result; + } + + std::vector<std::pair<int, unsigned> > + Swizzle(std::vector<std::pair<int, unsigned> > Src, + BankSwizzle Swz) const { + switch (Swz) { + case ALU_VEC_012: + break; + case ALU_VEC_021: + std::swap(Src[1], Src[2]); + break; + case ALU_VEC_102: + std::swap(Src[0], Src[1]); + break; + case ALU_VEC_120: + std::swap(Src[0], Src[1]); + std::swap(Src[0], Src[2]); + break; + case ALU_VEC_201: + std::swap(Src[0], Src[2]); + std::swap(Src[0], Src[1]); + break; + case ALU_VEC_210: + std::swap(Src[0], Src[2]); + break; + } + return Src; + } + + bool isLegal(const std::vector<MachineInstr *> &IG, + const std::vector<BankSwizzle> &Swz, + const DenseMap<unsigned, unsigned> &PV) const { + assert (Swz.size() == IG.size()); + int Vector[4][3]; + memset(Vector, -1, sizeof(Vector)); + for (unsigned i = 0, e = IG.size(); i < e; i++) { + const std::vector<std::pair<int, unsigned> > &Srcs = + Swizzle(ExtractSrcs(IG[i], PV), Swz[i]); + for (unsigned j = 0; j < 3; j++) { + const std::pair<int, unsigned> &Src = Srcs[j]; + if (Src.first < 0) + continue; + if (Vector[Src.second][j] < 0) + Vector[Src.second][j] = Src.first; + if (Vector[Src.second][j] != Src.first) + return false; + } + } + return true; + } + + bool recursiveFitsFPLimitation( + std::vector<MachineInstr *> IG, + const DenseMap<unsigned, unsigned> &PV, + std::vector<BankSwizzle> &SwzCandidate, + std::vector<MachineInstr *> CurrentlyChecked) + const { + if (!isLegal(CurrentlyChecked, SwzCandidate, PV)) + return false; + if (IG.size() == CurrentlyChecked.size()) { + return true; + } + BankSwizzle AvailableSwizzle[] = { + ALU_VEC_012, + ALU_VEC_021, + ALU_VEC_120, + ALU_VEC_102, + ALU_VEC_201, + ALU_VEC_210 + }; + CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]); + for (unsigned i = 0; i < 6; i++) { + SwzCandidate.push_back(AvailableSwizzle[i]); + if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked)) + return true; + SwzCandidate.pop_back(); + } + return false; + } + + bool fitsReadPortLimitation( + std::vector<MachineInstr *> IG, + const DenseMap<unsigned, unsigned> &PV) + const { + //Todo : support shared src0 - src1 operand + std::vector<BankSwizzle> SwzCandidate; + bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate, + std::vector<MachineInstr *>()); + if (!Result) + return false; + for (unsigned i = 0, e = IG.size(); i < e; i++) { + MachineInstr *MI = IG[i]; + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + R600Operands::BANK_SWIZZLE); + MI->getOperand(Op).setImm(SwzCandidate[i]); + } + return true; + } +}; + +bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + + // Instantiate the packetizer. + R600PacketizerList Packetizer(Fn, MLI, MDT); + + // DFA state table should not be empty. + assert(Packetizer.getResourceTracker() && "Empty DFA table!"); + + // + // Loop over all basic blocks and remove KILL pseudo-instructions + // These instructions confuse the dependence analysis. Consider: + // D0 = ... (Insn 0) + // R0 = KILL R0, D0 (Insn 1) + // R0 = ... (Insn 2) + // Here, Insn 1 will result in the dependence graph not emitting an output + // dependence between Insn 0 and Insn 2. This can lead to incorrect + // packetization + // + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + MachineBasicBlock::iterator End = MBB->end(); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != End) { + if (MI->isKill()) { + MachineBasicBlock::iterator DeleteMI = MI; + ++MI; + MBB->erase(DeleteMI); + End = MBB->end(); + continue; + } + ++MI; + } + } + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // Find scheduling regions and schedule / packetize each region. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin();) { + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn)) + break; + } + I = MBB->begin(); + + // Skip empty scheduling regions. + if (I == RegionEnd) { + RegionEnd = llvm::prior(RegionEnd); + --RemainingCount; + continue; + } + // Skip regions with one instruction. + if (I == llvm::prior(RegionEnd)) { + RegionEnd = llvm::prior(RegionEnd); + continue; + } + + Packetizer.PacketizeMIs(MBB, I, RegionEnd); + RegionEnd = I; + } + } + + return true; + +} + +} + +llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { + return new R600Packetizer(tm); +} + +#endif // R600PACKETIZER_CPP |