diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/MachineScheduler.cpp | 436 |
1 files changed, 216 insertions, 220 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index e06bc51..eaba9a5 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -13,29 +13,66 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <vector> using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { + cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, @@ -43,7 +80,8 @@ cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); -} + +} // end namespace llvm #ifndef NDEBUG static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, @@ -80,10 +118,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true)); -// Experimental heuristics -static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, - cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); @@ -92,14 +126,14 @@ static const unsigned MinSubtreeSize = 8; // Pin the vtables to this file. void MachineSchedStrategy::anchor() {} + void ScheduleDAGMutation::anchor() {} //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// -MachineSchedContext::MachineSchedContext(): - MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { +MachineSchedContext::MachineSchedContext() { RegClassInfo = new RegisterClassInfo(); } @@ -108,6 +142,7 @@ MachineSchedContext::~MachineSchedContext() { } namespace { + /// Base class for a machine scheduler class that can run at any point. class MachineSchedulerBase : public MachineSchedContext, public MachineFunctionPass { @@ -149,18 +184,20 @@ public: protected: ScheduleDAGInstrs *createPostMachineScheduler(); }; -} // namespace + +} // end anonymous namespace char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -211,7 +248,7 @@ static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { /// MachineSchedOpt allows command line selection of the scheduler. static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, - RegisterPassParser<MachineSchedRegistry> > + RegisterPassParser<MachineSchedRegistry>> MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use")); @@ -448,7 +485,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // instruction stream until we find the nearest boundary. unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for (;I != MBB->begin(); --I) { + for (; I != MBB->begin(); --I) { MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; @@ -495,7 +532,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // thumb2 size reduction is currently an exception, so the PostMIScheduler // needs to do this. if (FixKillFlags) - Scheduler.fixupKills(&*MBB); + Scheduler.fixupKills(*MBB); } Scheduler.finalizeSchedule(); } @@ -504,13 +541,14 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } -LLVM_DUMP_METHOD -void ReadyQueue::dump() { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ReadyQueue::dump() const { dbgs() << "Queue " << Name << ": "; - for (unsigned i = 0, e = Queue.size(); i < e; ++i) - dbgs() << Queue[i]->NodeNum << " "; + for (const SUnit *SU : Queue) + dbgs() << SU->NodeNum << " "; dbgs() << "\n"; } +#endif //===----------------------------------------------------------------------===// // ScheduleDAGMI - Basic machine instruction scheduling. This is @@ -519,8 +557,7 @@ void ReadyQueue::dump() { // ===----------------------------------------------------------------------===/ // Provide a vtable anchor. -ScheduleDAGMI::~ScheduleDAGMI() { -} +ScheduleDAGMI::~ScheduleDAGMI() = default; bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); @@ -572,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { /// releaseSuccessors - Call releaseSucc on each of SU's successors. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - releaseSucc(SU, &*I); - } + for (SDep &Succ : SU->Succs) + releaseSucc(SU, &Succ); } /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When @@ -611,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { /// releasePredecessors - Call releasePred on each of SU's predecessors. void ScheduleDAGMI::releasePredecessors(SUnit *SU) { - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - releasePred(SU, &*I); - } + for (SDep &Pred : SU->Preds) + releasePred(SU, &Pred); } /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after @@ -687,8 +720,8 @@ void ScheduleDAGMI::schedule() { DEBUG( if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this); + for (const SUnit &SU : SUnits) + SU.dumpAll(this); if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this); ); @@ -749,28 +782,25 @@ void ScheduleDAGMI::schedule() { /// Apply each ScheduleDAGMutation step in order. void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } + for (auto &m : Mutations) + m->apply(this); } void ScheduleDAGMI:: findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, SmallVectorImpl<SUnit*> &BotRoots) { - for (std::vector<SUnit>::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + for (SUnit &SU : SUnits) { + assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); + SU.biasCriticalPath(); // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); + if (!SU.NumPredsLeft) + TopRoots.push_back(&SU); // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); + if (!SU.NumSuccsLeft) + BotRoots.push_back(&SU); } ExitSU.biasCriticalPath(); } @@ -785,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, // // Nodes with unreleased weak edges can still be roots. // Release top roots in forward order. - for (SmallVectorImpl<SUnit*>::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } + for (SUnit *SU : TopRoots) + SchedImpl->releaseTopNode(SU); + // Release bottom roots in reverse order so the higher priority nodes appear // first. This is more natural and slightly more efficient. for (SmallVectorImpl<SUnit*>::const_reverse_iterator @@ -825,7 +854,7 @@ void ScheduleDAGMI::placeDebugValues() { RegionBegin = FirstDbgValue; } - for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); MachineInstr *DbgValue = P.first; @@ -841,7 +870,7 @@ void ScheduleDAGMI::placeDebugValues() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void ScheduleDAGMI::dumpSchedule() const { +LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { if (SUnit *SU = getSUnit(&(*MI))) SU->dump(this); @@ -992,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() { } } DEBUG(dbgs() << "Excess PSets: "; - for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + for (const PressureChange &RCPS : RegionCriticalPSets) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].getPSet()) << " "; + RCPS.getPSet()) << " "; dbgs() << "\n"); } @@ -1003,16 +1032,15 @@ updateScheduledPressure(const SUnit *SU, const std::vector<unsigned> &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); - for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); - I != E; ++I) { - if (!I->isValid()) + for (const PressureChange &PC : PDiff) { + if (!PC.isValid()) break; - unsigned ID = I->getPSet(); + unsigned ID = PC.getPSet(); while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) ++CritIdx; if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() - && NewMaxPressure[ID] <= INT16_MAX) + && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); } unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); @@ -1136,6 +1164,12 @@ void ScheduleDAGMILive::schedule() { dbgs() << " Pressure Diff : "; getPressureDiff(&SU).dump(*TRI); } + dbgs() << " Single Issue : "; + if (SchedModel.mustBeginGroup(SU.getInstr()) && + SchedModel.mustEndGroup(SU.getInstr())) + dbgs() << "true;"; + else + dbgs() << "false;"; dbgs() << '\n'; } if (ExitSU.getInstr() != nullptr) @@ -1396,6 +1430,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { //===----------------------------------------------------------------------===// namespace { + /// \brief Post-process the DAG to create cluster edges between neighboring /// loads or between neighboring stores. class BaseMemOpClusterMutation : public ScheduleDAGMutation { @@ -1403,6 +1438,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { SUnit *SU; unsigned BaseReg; int64_t Offset; + MemOpInfo(SUnit *su, unsigned reg, int64_t ofs) : SU(su), BaseReg(reg), Offset(ofs) {} @@ -1439,31 +1475,31 @@ public: LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) : BaseMemOpClusterMutation(tii, tri, true) {} }; -} // anonymous + +} // end anonymous namespace namespace llvm { std::unique_ptr<ScheduleDAGMutation> createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI) + return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI) : nullptr; } std::unique_ptr<ScheduleDAGMutation> createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI) + return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI) : nullptr; } -} // namespace llvm +} // end namespace llvm void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; - for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) { - SUnit *SU = MemOps[Idx]; + for (SUnit *SU : MemOps) { unsigned BaseReg; int64_t Offset; if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) @@ -1491,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( // dependent on SUa can prevent load combining due to register reuse. // Predecessor edges do not need to be copied from SUb to SUa since nearby // loads should have effectively the same inputs. - for (SUnit::const_succ_iterator - SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { - if (SI->getSUnit() == SUb) + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) continue; - DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); - DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); } ++ClusterLength; } else @@ -1513,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { DenseMap<unsigned, unsigned> StoreChainIDs; // Map each store chain to a set of dependent MemOps. SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if ((IsLoad && !SU->getInstr()->mayLoad()) || - (!IsLoad && !SU->getInstr()->mayStore())) + for (SUnit &SU : DAG->SUnits) { + if ((IsLoad && !SU.getInstr()->mayLoad()) || + (!IsLoad && !SU.getInstr()->mayStore())) continue; unsigned ChainPredID = DAG->SUnits.size(); - for (SUnit::const_pred_iterator - PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { - if (PI->isCtrl()) { - ChainPredID = PI->getSUnit()->NodeNum; + for (const SDep &Pred : SU.Preds) { + if (Pred.isCtrl()) { + ChainPredID = Pred.getSUnit()->NodeNum; break; } } @@ -1534,82 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); if (Result.second) StoreChainDependents.resize(NumChains + 1); - StoreChainDependents[Result.first->second].push_back(SU); + StoreChainDependents[Result.first->second].push_back(&SU); } // Iterate over the store chains. - for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) - clusterNeighboringMemOps(StoreChainDependents[Idx], DAG); -} - -//===----------------------------------------------------------------------===// -// MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// - -namespace { -/// \brief Post-process the DAG to create cluster edges between instructions -/// that may be fused by the processor into a single operation. -class MacroFusion : public ScheduleDAGMutation { - const TargetInstrInfo &TII; -public: - MacroFusion(const TargetInstrInfo &TII) - : TII(TII) {} - - void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; -} // anonymous - -namespace llvm { - -std::unique_ptr<ScheduleDAGMutation> -createMacroFusionDAGMutation(const TargetInstrInfo *TII) { - return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr; -} - -} // namespace llvm - -/// \brief Callback from DAG postProcessing to create cluster edges to encourage -/// fused operations. -void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - - // For now, assume targets can only fuse with the branch. - SUnit &ExitSU = DAG->ExitSU; - MachineInstr *Branch = ExitSU.getInstr(); - if (!Branch) - return; - - for (SDep &PredDep : ExitSU.Preds) { - if (PredDep.isWeak()) - continue; - SUnit &SU = *PredDep.getSUnit(); - MachineInstr &Pred = *SU.getInstr(); - if (!TII.shouldScheduleAdjacent(Pred, *Branch)) - continue; - - // Create a single weak edge from SU to ExitSU. The only effect is to cause - // bottom-up scheduling to heavily prioritize the clustered SU. There is no - // need to copy predecessor edges from ExitSU to SU, since top-down - // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling - // of SU, we could create an artificial edge from the deepest root, but it - // hasn't been needed yet. - bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); - (void)Success; - assert(Success && "No DAG nodes should be reachable from ExitSU"); - - // Adjust latency of data deps between the nodes. - for (SDep &PredDep : ExitSU.Preds) { - if (PredDep.getSUnit() == &SU) - PredDep.setLatency(0); - } - for (SDep &SuccDep : SU.Succs) { - if (SuccDep.getSUnit() == &ExitSU) - SuccDep.setLatency(0); - } - - DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); - break; - } + for (auto &SCD : StoreChainDependents) + clusterNeighboringMemOps(SCD, DAG); } //===----------------------------------------------------------------------===// @@ -1617,6 +1580,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { //===----------------------------------------------------------------------===// namespace { + /// \brief Post-process the DAG to create weak edges from all uses of a copy to /// the one use that defines the copy's source vreg, most likely an induction /// variable increment. @@ -1626,6 +1590,7 @@ class CopyConstrain : public ScheduleDAGMutation { // RegionEndIdx is the slot index of the last non-debug instruction in the // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. SlotIndex RegionEndIdx; + public: CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} @@ -1634,17 +1599,18 @@ public: protected: void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); }; -} // anonymous + +} // end anonymous namespace namespace llvm { std::unique_ptr<ScheduleDAGMutation> createCopyConstrainDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { - return make_unique<CopyConstrain>(TII, TRI); + const TargetRegisterInfo *TRI) { + return llvm::make_unique<CopyConstrain>(TII, TRI); } -} // namespace llvm +} // end namespace llvm /// constrainLocalCopy handles two possibilities: /// 1) Local src: @@ -1749,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); - for (SUnit::const_succ_iterator - I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + for (const SDep &Succ : LastLocalSU->Succs) { + if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) continue; - if (I->getSUnit() == GlobalSU) + if (Succ.getSUnit() == GlobalSU) continue; - if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) return; - LocalUses.push_back(I->getSUnit()); + LocalUses.push_back(Succ.getSUnit()); } // Open the top of the GlobalLI hole by constraining any earlier global uses // to precede the start of LocalLI. @@ -1766,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { MachineInstr *FirstLocalDef = LIS->getInstructionFromIndex(LocalLI->beginIndex()); SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); - for (SUnit::const_pred_iterator - I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { - if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + for (const SDep &Pred : GlobalSU->Preds) { + if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) continue; - if (I->getSUnit() == FirstLocalSU) + if (Pred.getSUnit() == FirstLocalSU) continue; - if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) return; - GlobalUses.push_back(I->getSUnit()); + GlobalUses.push_back(Pred.getSUnit()); } DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); // Add the weak edges. @@ -1805,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { RegionEndIdx = DAG->getLIS()->getInstructionIndex( *priorNonDebug(DAG->end(), DAG->begin())); - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if (!SU->getInstr()->isCopy()) + for (SUnit &SU : DAG->SUnits) { + if (!SU.getInstr()->isCopy()) continue; - constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); + constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); } } @@ -1836,7 +1798,7 @@ void SchedBoundary::reset() { CheckPending = false; CurrCycle = 0; CurrMOps = 0; - MinReadyCycle = UINT_MAX; + MinReadyCycle = std::numeric_limits<unsigned>::max(); ExpectedLatency = 0; DependentLatency = 0; RetiredMOps = 0; @@ -1861,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!SchedModel->hasInstrSchedModel()) return; RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); - for (std::vector<SUnit>::iterator - I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { - const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + for (SUnit &SU : DAG->SUnits) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); + RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), @@ -1937,12 +1898,22 @@ bool SchedBoundary::checkHazard(SUnit *SU) { && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { return true; } + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } + + if (CurrMOps > 0 && + ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || + (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { + DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " + << (isTop()? "begin" : "end") << " group\n"); + return true; + } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { const MCSchedClassDesc *SC = DAG->getSchedClass(SU); for (TargetSchedModel::ProcResIter @@ -1968,12 +1939,11 @@ unsigned SchedBoundary:: findMaxLatency(ArrayRef<SUnit*> ReadySUs) { SUnit *LateSU = nullptr; unsigned RemLatency = 0; - for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); + for (SUnit *SU : ReadySUs) { + unsigned L = getUnscheduledLatency(SU); if (L > RemLatency) { RemLatency = L; - LateSU = *I; + LateSU = SU; } } if (LateSU) { @@ -2039,7 +2009,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { /// Move the boundary of scheduled code by one cycle. void SchedBoundary::bumpCycle(unsigned NextCycle) { if (SchedModel->getMicroOpBufferSize() == 0) { - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && + "MinReadyCycle uninitialized"); if (MinReadyCycle > NextCycle) NextCycle = MinReadyCycle; } @@ -2237,6 +2208,18 @@ void SchedBoundary::bumpNode(SUnit *SU) { // one cycle. Since we commonly reach the max MOps here, opportunistically // bump the cycle to avoid uselessly checking everything in the readyQ. CurrMOps += IncMOps; + + // Bump the cycle count for issue group constraints. + // This must be done after NextCycle has been adjust for all other stalls. + // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set + // currCycle to X. + if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || + (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { + DEBUG(dbgs() << " Bump cycle to " + << (isTop() ? "end" : "begin") << " group\n"); + bumpCycle(++NextCycle); + } + while (CurrMOps >= SchedModel->getIssueWidth()) { DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle " << CurrCycle << '\n'); @@ -2250,7 +2233,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { void SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) - MinReadyCycle = UINT_MAX; + MinReadyCycle = std::numeric_limits<unsigned>::max(); // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -2323,10 +2306,10 @@ SUnit *SchedBoundary::pickOnlyChoice() { return nullptr; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -void SchedBoundary::dumpScheduledState() { +LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -2665,12 +2648,15 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } -void GenericScheduler::dumpPolicy() { +void GenericScheduler::dumpPolicy() const { + // Cannot completely remove virtual function even in release mode. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << "GenericScheduler RegionPolicy: " << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure << " OnlyTopDown=" << RegionPolicy.OnlyTopDown << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp << "\n"; +#endif } /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic @@ -2714,17 +2700,16 @@ void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (std::vector<SUnit*>::const_iterator - I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : Bot.Available) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; } - if (EnableCyclicPath) { + if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); checkAcyclicLatency(); } @@ -2964,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); ReadyQueue &Q = Zone.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); - initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); @@ -3106,7 +3091,6 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { } void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { - MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) ++InsertPos; @@ -3114,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. - for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + for (SDep &Dep : Deps) { + if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) continue; - SUnit *DepSU = I->getSUnit(); + SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) continue; MachineInstr *Copy = DepSU->getInstr(); if (!Copy->isCopy()) continue; DEBUG(dbgs() << " Rescheduling physreg copy "; - I->getSUnit()->dump(DAG)); + Dep.getSUnit()->dump(DAG)); DAG->moveInstruction(Copy, InsertPos); } } @@ -3154,7 +3137,8 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { - ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate @@ -3195,15 +3179,13 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { } } - void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (SmallVectorImpl<SUnit*>::const_iterator - I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : BotRoots) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -3229,6 +3211,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) return; + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), + TryCand, Cand, Cluster)) + return; + // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) @@ -3250,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(Cand.Policy); - TryCand.SU = *I; + TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); tryCandidate(Cand, TryCand); @@ -3302,7 +3290,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), + return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C), /*RemoveKillFlags=*/true); } @@ -3311,14 +3299,14 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { //===----------------------------------------------------------------------===// namespace { + /// \brief Order nodes by the ILP metric. struct ILPOrder { - const SchedDFSResult *DFSResult; - const BitVector *ScheduledTrees; + const SchedDFSResult *DFSResult = nullptr; + const BitVector *ScheduledTrees = nullptr; bool MaximizeILP; - ILPOrder(bool MaxILP) - : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. /// @@ -3347,12 +3335,13 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - ScheduleDAGMILive *DAG; + ScheduleDAGMILive *DAG = nullptr; ILPOrder Cmp; std::vector<SUnit*> ReadyQ; + public: - ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} + ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {} void initialize(ScheduleDAGMI *dag) override { assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); @@ -3405,14 +3394,16 @@ public: std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } }; -} // namespace + +} // end anonymous namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true)); + return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false)); + return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false)); } + static MachineSchedRegistry ILPMaxRegistry( "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); static MachineSchedRegistry ILPMinRegistry( @@ -3424,6 +3415,7 @@ static MachineSchedRegistry ILPMinRegistry( #ifndef NDEBUG namespace { + /// Apply a less-than relation on the node order, which corresponds to the /// instruction order prior to scheduling. IsReverse implements greater-than. template<bool IsReverse> @@ -3444,11 +3436,12 @@ class InstructionShuffler : public MachineSchedStrategy { // Using a less-than relation (SUnitOrder<false>) for the TopQ priority // gives nodes with a higher number higher priority causing the latest // instructions to be scheduled first. - PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> TopQ; // When scheduling bottom-up, use greater-than as the queue priority. - PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> BottomQ; + public: InstructionShuffler(bool alternate, bool topdown) : IsAlternating(alternate), IsTopDown(topdown) {} @@ -3492,15 +3485,18 @@ public: BottomQ.push(SU); } }; -} // namespace + +} // end anonymous namespace static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { bool Alternate = !ForceTopDown && !ForceBottomUp; bool TopDown = !ForceBottomUp; assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown)); + return new ScheduleDAGMILive( + C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown)); } + static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler); @@ -3518,8 +3514,7 @@ template<> struct GraphTraits< template<> struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { - - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { return G->MF.getName(); @@ -3576,7 +3571,8 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { return Str; } }; -} // namespace llvm + +} // end namespace llvm #endif // NDEBUG /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG |