diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 88 |
1 files changed, 51 insertions, 37 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 90a37f1..821aada 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -91,7 +91,8 @@ private: bool isOpRelevant(MachineOperand &Op); /// \brief Get register interval an operand affects. - RegInterval getRegInterval(MachineOperand &Op); + RegInterval getRegInterval(const TargetRegisterClass *RC, + const MachineOperand &Reg) const; /// \brief Handle instructions async components void pushInstruction(MachineBasicBlock &MBB, @@ -121,9 +122,13 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "SI insert wait instructions"; + return "SI insert wait instructions"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } // End anonymous namespace @@ -138,9 +143,8 @@ FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { } Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { - - uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - Counters Result; + uint64_t TSFlags = MI.getDesc().TSFlags; + Counters Result = { { 0, 0, 0 } }; Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); @@ -151,15 +155,22 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { - if (TII->isSMRD(MI.getOpcode())) { - - MachineOperand &Op = MI.getOperand(0); - assert(Op.isReg() && "First LGKM operand must be a register!"); - - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; - + if (TII->isSMRD(MI)) { + + if (MI.getNumOperands() != 0) { + assert(MI.getOperand(0).isReg() && + "First LGKM operand must be a register!"); + + // XXX - What if this is a write into a super register? + const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0); + unsigned Size = RC->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + } else { + // s_dcache_inv etc. do not have a a destination register. Assume we + // want a wait on these. + // XXX - What is the right value? + Result.Named.LGKM = 1; + } } else { // DS Result.Named.LGKM = 1; @@ -173,9 +184,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { } bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { - // Constants are always irrelevant - if (!Op.isReg()) + if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) return false; // Defines are always relevant @@ -196,7 +206,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { // operand comes before the value operand and it may have // multiple data operands. - if (TII->isDS(MI.getOpcode())) { + if (TII->isDS(MI)) { MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data); if (Data && Op.isIdenticalTo(*Data)) return true; @@ -224,18 +234,13 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { return false; } -RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { - - if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) - return std::make_pair(0, 0); - - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - +RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC, + const MachineOperand &Reg) const { + unsigned Size = RC->getSize(); assert(Size >= 4); RegInterval Result; - Result.first = TRI->getEncodingValue(Reg); + Result.first = TRI->getEncodingValue(Reg.getReg()); Result.second = Result.first + Size / 4; return Result; @@ -246,10 +251,13 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // Get the hardware counter increments and sum them up Counters Increment = getHwCounts(*I); + Counters Limit = ZeroCounts; unsigned Sum = 0; for (unsigned i = 0; i < 3; ++i) { LastIssued.Array[i] += Increment.Array[i]; + if (Increment.Array[i]) + Limit.Array[i] = LastIssued.Array[i]; Sum += Increment.Array[i]; } @@ -261,7 +269,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM + // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM // or SMEM clause, respectively. // // The temporary workaround is to break the clauses with S_NOP. @@ -270,7 +278,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // and destination registers don't overlap, e.g. this is illegal: // r0 = load r2 // r2 = load r0 - if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) || + if ((LastOpcodeType == SMEM && TII->isSMRD(*I)) || (LastOpcodeType == VMEM && Increment.Named.VM)) { // Insert a NOP to break the clause. BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)) @@ -278,7 +286,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, LastInstWritesM0 = false; } - if (TII->isSMRD(I->getOpcode())) + if (TII->isSMRD(*I)) LastOpcodeType = SMEM; else if (Increment.Named.VM) LastOpcodeType = VMEM; @@ -290,21 +298,21 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &Op = I->getOperand(i); if (!isOpRelevant(Op)) continue; - RegInterval Interval = getRegInterval(Op); + const TargetRegisterClass *RC = TII->getOpRegClass(*I, i); + RegInterval Interval = getRegInterval(RC, Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { // Remember which registers we define if (Op.isDef()) - DefinedRegs[j] = LastIssued; + DefinedRegs[j] = Limit; // and which one we are using if (Op.isUse()) - UsedRegs[j] = LastIssued; + UsedRegs[j] = Limit; } } } @@ -390,12 +398,18 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { if (MI.getOpcode() == AMDGPU::S_SENDMSG) return LastIssued; - // For each register affected by this - // instruction increase the result sequence + // For each register affected by this instruction increase the result + // sequence. + // + // TODO: We could probably just look at explicit operands if we removed VCC / + // EXEC from SMRD dest reg classes. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &Op = MI.getOperand(i); - RegInterval Interval = getRegInterval(Op); + if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) + continue; + + const TargetRegisterClass *RC = TII->getOpRegClass(MI, i); + RegInterval Interval = getRegInterval(RC, Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { if (Op.isDef()) { |