diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/MachineScheduler.cpp | 322 |
1 files changed, 303 insertions, 19 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index 5bd2349..fff6b2b 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -51,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Experimental heuristics +// FIXME: remove this flag after initial testing. It should always be a good +// thing. +static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, + cl::desc("Constrain vreg copies."), cl::init(true)); + static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -323,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() { delete SchedImpl; } +bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { + return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); +} + bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { if (SuccSU != &ExitSU) { // Do not use WillCreateCycle, it assumes SD scheduling. @@ -404,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { } } +/// This is normally called from the main scheduler loop but may also be invoked +/// by the scheduling strategy to perform additional code motion. void ScheduleDAGMI::moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { // Advance RegionBegin if the first instruction moves down. @@ -505,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { if ((int)NewMaxPressure[ID] > MaxUnits) MaxUnits = NewMaxPressure[ID]; } + DEBUG( + for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (NewMaxPressure[i] > Limit ) { + dbgs() << " " << TRI->getRegPressureSetName(i) << ": " + << NewMaxPressure[i] << " > " << Limit << "\n"; + } + }); } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -905,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// +// CopyConstrain - DAG post-processing to encourage copy elimination. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// the one use that defines the copy's source vreg, most likely an induction +/// variable increment. +class CopyConstrain : public ScheduleDAGMutation { + // Transient state. + SlotIndex RegionBeginIdx; + // RegionEndIdx is the slot index of the last non-debug instruction in the + // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. + SlotIndex RegionEndIdx; +public: + CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} + + virtual void apply(ScheduleDAGMI *DAG); + +protected: + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); +}; +} // anonymous + +/// constrainLocalCopy handles two possibilities: +/// 1) Local src: +/// I0: = dst +/// I1: src = ... +/// I2: = dst +/// I3: dst = src (copy) +/// (create pred->succ edges I0->I1, I2->I1) +/// +/// 2) Local copy: +/// I0: dst = src (copy) +/// I1: = dst +/// I2: src = ... +/// I3: = dst +/// (create pred->succ edges I1->I2, I3->I2) +/// +/// Although the MachineScheduler is currently constrained to single blocks, +/// this algorithm should handle extended blocks. An EBB is a set of +/// contiguously numbered blocks such that the previous block in the EBB is +/// always the single predecessor. +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { + LiveIntervals *LIS = DAG->getLIS(); + MachineInstr *Copy = CopySU->getInstr(); + + // Check for pure vreg copies. + unsigned SrcReg = Copy->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return; + + unsigned DstReg = Copy->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return; + + // Check if either the dest or source is local. If it's live across a back + // edge, it's not local. Note that if both vregs are live across the back + // edge, we cannot successfully contrain the copy without cyclic scheduling. + unsigned LocalReg = DstReg; + unsigned GlobalReg = SrcReg; + LiveInterval *LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { + LocalReg = SrcReg; + GlobalReg = DstReg; + LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) + return; + } + LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); + + // Find the global segment after the start of the local LI. + LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); + // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a + // local live range. We could create edges from other global uses to the local + // start, but the coalescer should have already eliminated these cases, so + // don't bother dealing with it. + if (GlobalSegment == GlobalLI->end()) + return; + + // If GlobalSegment is killed at the LocalLI->start, the call to find() + // returned the next global segment. But if GlobalSegment overlaps with + // LocalLI->start, then advance to the next segement. If a hole in GlobalLI + // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. + if (GlobalSegment->contains(LocalLI->beginIndex())) + ++GlobalSegment; + + if (GlobalSegment == GlobalLI->end()) + return; + + // Check if GlobalLI contains a hole in the vicinity of LocalLI. + if (GlobalSegment != GlobalLI->begin()) { + // Two address defs have no hole. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, + GlobalSegment->start)) { + return; + } + // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise + // it would be a disconnected component in the live range. + assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && + "Disconnected LRG within the scheduling region."); + } + MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); + if (!GlobalDef) + return; + + SUnit *GlobalSU = DAG->getSUnit(GlobalDef); + if (!GlobalSU) + return; + + // GlobalDef is the bottom of the GlobalLI hole. Open the hole by + // constraining the uses of the last local def to precede GlobalDef. + SmallVector<SUnit*,8> LocalUses; + const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); + MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); + SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); + for (SUnit::const_succ_iterator + I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + continue; + if (I->getSUnit() == GlobalSU) + continue; + if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + return; + LocalUses.push_back(I->getSUnit()); + } + // Open the top of the GlobalLI hole by constraining any earlier global uses + // to precede the start of LocalLI. + SmallVector<SUnit*,8> GlobalUses; + MachineInstr *FirstLocalDef = + LIS->getInstructionFromIndex(LocalLI->beginIndex()); + SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); + for (SUnit::const_pred_iterator + I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { + if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + continue; + if (I->getSUnit() == FirstLocalSU) + continue; + if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + return; + GlobalUses.push_back(I->getSUnit()); + } + DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); + // Add the weak edges. + for (SmallVectorImpl<SUnit*>::const_iterator + I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" + << GlobalSU->NodeNum << ")\n"); + DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); + } + for (SmallVectorImpl<SUnit*>::const_iterator + I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" + << FirstLocalSU->NodeNum << ")\n"); + DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); + } +} + +/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// copy elimination. +void CopyConstrain::apply(ScheduleDAGMI *DAG) { + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); + if (FirstPos == DAG->end()) + return; + RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); + RegionEndIdx = DAG->getLIS()->getInstructionIndex( + &*priorNonDebug(DAG->end(), DAG->begin())); + + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->isCopy()) + continue; + + constrainLocalCopy(SU, DAG); + } +} + +//===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -916,7 +1112,7 @@ public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, SingleExcess, SingleCritical, Cluster, + NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, NodeOrder}; @@ -1191,6 +1387,8 @@ protected: const RegPressureTracker &RPTracker, SchedCandidate &Candidate); + void reschedulePhysRegCopies(SUnit *SU, bool isTop); + #ifndef NDEBUG void traceCandidate(const SchedCandidate &Cand); #endif @@ -1339,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); + DEBUG(dbgs() << " " << Available.getName() + << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); if (L > RemLatency) RemLatency = L; } @@ -1349,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { RemLatency = L; } unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + DEBUG(dbgs() << " " << Available.getName() + << " ExpectedLatency " << ExpectedLatency + << " CP Limit " << CriticalPathLimit << '\n'); if (RemLatency + ExpectedLatency >= CriticalPathLimit && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { Policy.ReduceLatency = true; - DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); + DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); } } @@ -1569,7 +1772,8 @@ void ConvergingScheduler::balanceZones( if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) > (int)SchedModel->getLatencyFactor()) { CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " + DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() + << " reduce " << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name << '\n'); } @@ -1580,7 +1784,8 @@ void ConvergingScheduler::balanceZones( if ((int)(OppositeZone.ExpectedCount - OppositeCount) > (int)SchedModel->getLatencyFactor()) { OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " + DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() + << " demand " << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name << '\n'); } @@ -1604,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits( if (Top.CritResIdx != Rem.CritResIdx) { TopCand.Policy.ReduceResIdx = Top.CritResIdx; BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << "Reduce scheduled " + DEBUG(dbgs() << " Reduce scheduled " << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); } return; @@ -1621,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits( && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { TopCand.Policy.ReduceLatency = true; BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency + DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency << " + " << Bot.ExpectedLatency << '\n'); } return; @@ -1696,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } +/// Minimize physical register live ranges. Regalloc wants them adjacent to +/// their physreg def/use. +/// +/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf +/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled +/// with the operation that produces or consumes the physreg. We'll do this when +/// regalloc has support for parallel copies. +static int biasPhysRegCopy(const SUnit *SU, bool isTop) { + const MachineInstr *MI = SU->getInstr(); + if (!MI->isCopy()) + return 0; + + unsigned ScheduledOper = isTop ? 1 : 0; + unsigned UnscheduledOper = isTop ? 0 : 1; + // If we have already scheduled the physreg produce/consumer, immediately + // schedule the copy. + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(ScheduledOper).getReg())) + return 1; + // If the physreg is at the boundary, defer it. Otherwise schedule it + // immediately to free the dependent. We can hoist the copy later. + bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(UnscheduledOper).getReg())) + return AtBoundary ? -1 : 1; + return 0; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1723,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand.Reason = NodeOrder; return; } + + if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), + biasPhysRegCopy(Cand.SU, Zone.isTop()), + TryCand, Cand, PhysRegCopy)) + return; + // Avoid exceeding the target's limit. if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) @@ -1749,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, TryCand, Cand, Cluster)) return; - // Currently, weak edges are for clustering, so we hard-code that reason. - // However, deferring the current TryCand will not change Cand's reason. + + // Weak edges are for clustering and other constraints. + // + // Deferring TryCand here does not change Cand's reason. This is good in the + // sense that a bad candidate shouldn't affect a previous candidate's + // goodness, but bad in that it is assymetric and depends on queue order. CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), - TryCand, Cand, Cluster)) { + TryCand, Cand, Weak)) { Cand.Reason = OrigReason; return; } @@ -1825,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS, // Avoid increasing the max critical pressure in the scheduled region. if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << "RP excess top - bot: " + DEBUG(dbgs() << " RP excess top - bot: " << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; } // Avoid increasing the max critical pressure in the scheduled region. if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << "RP critical top - bot: " + DEBUG(dbgs() << " RP critical top - bot: " << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) << '\n'); return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; } // Avoid increasing the max pressure of the entire region. if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << "RP current top - bot: " + DEBUG(dbgs() << " RP current top - bot: " << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) << '\n'); return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; @@ -1851,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr( ConvergingScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; case SingleExcess: return "REG-EXCESS"; case SingleCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; case SingleMax: return "REG-MAX "; case MultiPressure: return "REG-MULTI "; case ResourceReduce: return "RES-REDUCE"; @@ -1953,8 +2198,7 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot") - << " SU(" << Cand.SU->NodeNum << ") " + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); } @@ -1964,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } CandPolicy NoPolicy; @@ -2065,21 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "Scheduling " << *SU->getInstr()); + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); return SU; } +void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { + + MachineBasicBlock::iterator InsertPos = SU->getInstr(); + if (!isTop) + ++InsertPos; + SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; + + // Find already scheduled copies with a single physreg dependence and move + // them just above the scheduled instruction. + for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + continue; + SUnit *DepSU = I->getSUnit(); + if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) + continue; + MachineInstr *Copy = DepSU->getInstr(); + if (!Copy->isCopy()) + continue; + DEBUG(dbgs() << " Rescheduling physreg copy "; + I->getSUnit()->dump(DAG)); + DAG->moveInstruction(Copy, InsertPos); + } +} + /// Update the scheduler's state after scheduling a node. This is the same node /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update /// it's state based on the current cycle before MachineSchedStrategy does. +/// +/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling +/// them here. See comments in biasPhysRegCopy. void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = Top.CurrCycle; Top.bumpNode(SU); + if (SU->hasPhysRegUses) + reschedulePhysRegCopies(SU, true); } else { SU->BotReadyCycle = Bot.CurrCycle; Bot.bumpNode(SU); + if (SU->hasPhysRegDefs) + reschedulePhysRegCopies(SU, false); } } @@ -2090,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { "-misched-topdown incompatible with -misched-bottomup"); ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); // Register DAG post-processors. + // + // FIXME: extend the mutation API to allow earlier mutations to instantiate + // data and pass it to later mutations. Have a single mutation that gathers + // the interesting nodes in one pass. + if (EnableCopyConstrain) + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); if (EnableLoadCluster) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) @@ -2179,12 +2463,12 @@ public: SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; - DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " - << *SU->getInstr() + DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " << " ILP: " << DAG->getDFSResult()->getILP(SU) << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" << DAG->getDFSResult()->getSubtreeLevel( - DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); + DAG->getDFSResult()->getSubtreeID(SU)) << '\n' + << "Scheduling " << *SU->getInstr()); return SU; } |