diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp new file mode 100644 index 0000000..2f88033 --- /dev/null +++ b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -0,0 +1,312 @@ +//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This contains a MachineSchedStrategy implementation for maximizing wave +/// occupancy on GCN hardware. +//===----------------------------------------------------------------------===// + +#include "GCNSchedStrategy.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" + +#define DEBUG_TYPE "misched" + +using namespace llvm; + +GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( + const MachineSchedContext *C) : + GenericScheduler(C) { } + +static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, + const MachineFunction &MF) { + + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs), + ST.getOccupancyWithNumVGPRs(VGPRs)); + return std::min(MinRegOccupancy, + ST.getOccupancyWithLocalMemSize(MFI->getLDSSize())); +} + +void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, + bool AtTop, const RegPressureTracker &RPTracker, + const SIRegisterInfo *SRI, + int SGPRPressure, + int VGPRPressure, + int SGPRExcessLimit, + int VGPRExcessLimit, + int SGPRCriticalLimit, + int VGPRCriticalLimit) { + + Cand.SU = SU; + Cand.AtTop = AtTop; + + // getDownwardPressure() and getUpwardPressure() make temporary changes to + // the the tracker, so we need to pass those function a non-const copy. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + std::vector<unsigned> Pressure; + std::vector<unsigned> MaxPressure; + + if (AtTop) + TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); + else { + // FIXME: I think for bottom up scheduling, the register pressure is cached + // and can be retrieved by DAG->getPressureDif(SU). + TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); + } + + int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; + int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + + // If two instructions increase the pressure of different register sets + // by the same amount, the generic scheduler will prefer to schedule the + // instruction that increases the set with the least amount of registers, + // which in our case would be SGPRs. This is rarely what we want, so + // when we report excess/critical register pressure, we do it either + // only for VGPRs or only for SGPRs. + + // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. + const int MaxVGPRPressureInc = 16; + bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; + bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; + + + // FIXME: We have to enter REG-EXCESS before we reach the actual threshold + // to increase the likelihood we don't go over the limits. We should improve + // the analysis to look through dependencies to find the path with the least + // register pressure. + // FIXME: This is also necessary, because some passes that run after + // scheduling and before regalloc increase register pressure. + const int ErrorMargin = 3; + VGPRExcessLimit -= ErrorMargin; + SGPRExcessLimit -= ErrorMargin; + + // We only need to update the RPDelata for instructions that increase + // register pressure. Instructions that decrease or keep reg pressure + // the same will be marked as RegExcess in tryCandidate() when they + // are compared with instructions that increase the register pressure. + if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { + Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); + Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); + } + + if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { + Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); + Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit); + } + + // Register pressure is considered 'CRITICAL' if it is approaching a value + // that would reduce the wave occupancy for the execution unit. When + // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both + // has the same cost, so we don't need to prefer one over the other. + + VGPRCriticalLimit -= ErrorMargin; + SGPRCriticalLimit -= ErrorMargin; + + int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; + int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; + + if (SGPRDelta >= 0 || VGPRDelta >= 0) { + if (SGPRDelta > VGPRDelta) { + Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); + Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); + } else { + Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); + Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); + } + } +} + +// This function is mostly cut and pasted from +// GenericScheduler::pickNodeFromQueue() +void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, + const CandPolicy &ZonePolicy, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { + const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>(); + const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); + ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); + unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; + unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; + unsigned SGPRExcessLimit = + Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); + unsigned VGPRExcessLimit = + Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); + unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); + unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true); + unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves); + + ReadyQueue &Q = Zone.Available; + for (SUnit *SU : Q) { + + SchedCandidate TryCand(ZonePolicy); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, + SGPRPressure, VGPRPressure, + SGPRExcessLimit, VGPRExcessLimit, + SGPRCriticalLimit, VGPRCriticalLimit); + // Pass SchedBoundary only when comparing nodes from the same boundary. + SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; + GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(Zone.DAG, SchedModel); + Cand.setBest(TryCand); + } + } +} + +static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) { + switch (Reason) { + default: + return Reason; + case GenericSchedulerBase::RegCritical: + case GenericSchedulerBase::RegExcess: + return -Reason; + } +} + +// This function is mostly cut and pasted from +// GenericScheduler::pickNodeBidirectional() +SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + CandPolicy BotPolicy; + setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + CandPolicy TopPolicy; + setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); + + // See if BotCand is still valid (because we previously scheduled from Top). + DEBUG(dbgs() << "Picking from Bot:\n"); + if (!BotCand.isValid() || BotCand.SU->isScheduled || + BotCand.Policy != BotPolicy) { + BotCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(BotCand)); + } + + // Check if the top Q has a better candidate. + DEBUG(dbgs() << "Picking from Top:\n"); + if (!TopCand.isValid() || TopCand.SU->isScheduled || + TopCand.Policy != TopPolicy) { + TopCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(TopCand)); + } + + // Pick best from BotCand and TopCand. + DEBUG( + dbgs() << "Top Cand: "; + traceCandidate(BotCand); + dbgs() << "Bot Cand: "; + traceCandidate(TopCand); + ); + SchedCandidate Cand; + if (TopCand.Reason == BotCand.Reason) { + Cand = BotCand; + GenericSchedulerBase::CandReason TopReason = TopCand.Reason; + TopCand.Reason = NoCand; + GenericScheduler::tryCandidate(Cand, TopCand, nullptr); + if (TopCand.Reason != NoCand) { + Cand.setBest(TopCand); + } else { + TopCand.Reason = TopReason; + } + } else { + if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { + Cand = TopCand; + } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { + Cand = BotCand; + } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { + Cand = TopCand; + } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { + Cand = BotCand; + } else { + int TopRank = getBidirectionalReasonRank(TopCand.Reason); + int BotRank = getBidirectionalReasonRank(BotCand.Reason); + if (TopRank > BotRank) { + Cand = TopCand; + } else { + Cand = BotCand; + } + } + } + DEBUG( + dbgs() << "Picking: "; + traceCandidate(Cand); + ); + + IsTopNode = Cand.AtTop; + return Cand.SU; +} + +// This function is mostly cut and pasted from +// GenericScheduler::pickNode() +SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + do { + if (RegionPolicy.OnlyTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + TopCand.reset(NoPolicy); + pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + SU = TopCand.SU; + } + IsTopNode = true; + } else if (RegionPolicy.OnlyBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + BotCand.reset(NoPolicy); + pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidirectional(IsTopNode); + } + } while (SU->isScheduled); + + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} |