diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/MachineScheduler.cpp | 214 |
1 files changed, 129 insertions, 85 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index d921e29..e06bc51 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -230,11 +230,6 @@ static cl::opt<bool> EnablePostRAMachineSched( cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden); -/// Forward declare the standard machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); -static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); - /// Decrement this iterator until reaching the top or a non-debug instr. static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, @@ -251,8 +246,8 @@ priorNonDebug(MachineBasicBlock::const_iterator I, static MachineBasicBlock::iterator priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::const_iterator Beg) { - return const_cast<MachineInstr*>( - &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); + return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) + .getNonConstIterator(); } /// If this iterator is a debug value, increment until reaching the End or a @@ -271,12 +266,8 @@ nextIfDebug(MachineBasicBlock::const_iterator I, static MachineBasicBlock::iterator nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::const_iterator End) { - // Cast the return value to nonconst MachineInstr, then cast to an - // instr_iterator, which does not check for null, finally return a - // bundle_iterator. - return MachineBasicBlock::instr_iterator( - const_cast<MachineInstr*>( - &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); + return nextIfDebug(MachineBasicBlock::const_iterator(I), End) + .getNonConstIterator(); } /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. @@ -458,9 +449,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for (;I != MBB->begin(); --I) { - if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) + MachineInstr &MI = *std::prev(I); + if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!I->isDebugValue()) + if (!MI.isDebugValue()) ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling @@ -692,8 +684,14 @@ void ScheduleDAGMI::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + if (EntrySU.getInstr() != nullptr) + EntrySU.dumpAll(this); + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this); + if (ExitSU.getInstr() != nullptr) + ExitSU.dumpAll(this); + ); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -862,6 +860,44 @@ ScheduleDAGMILive::~ScheduleDAGMILive() { delete DFSResult; } +void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { + const MachineInstr &MI = *SU.getInstr(); + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Ignore re-defs. + if (TrackLaneMasks) { + bool FoundDef = false; + for (const MachineOperand &MO2 : MI.operands()) { + if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { + FoundDef = true; + break; + } + } + if (FoundDef) + continue; + } + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == &SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); + } +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions @@ -889,6 +925,11 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { + VRegUses.clear(); + VRegUses.setUniverse(MRI.getNumVirtRegs()); + for (SUnit &SU : SUnits) + collectVRegUses(SU); + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, ShouldTrackLaneMasks, false); BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, @@ -999,7 +1040,7 @@ void ScheduleDAGMILive::updatePressureDiffs( // this fact anymore => decrement pressure. // If the register has just become dead then other uses make it come // back to life => increment pressure. - bool Decrement = P.LaneMask != 0; + bool Decrement = P.LaneMask.any(); for (const VReg2SUnit &V2SU : make_range(VRegUses.find(Reg), VRegUses.end())) { @@ -1018,7 +1059,7 @@ void ScheduleDAGMILive::updatePressureDiffs( ); } } else { - assert(P.LaneMask != 0); + assert(P.LaneMask.any()); DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); // This may be called before CurrentBottom has been initialized. However, // BotRPTracker must have a valid position. We want the value live into the @@ -1087,6 +1128,8 @@ void ScheduleDAGMILive::schedule() { SchedImpl->initialize(this); DEBUG( + if (EntrySU.getInstr() != nullptr) + EntrySU.dumpAll(this); for (const SUnit &SU : SUnits) { SU.dumpAll(this); if (ShouldTrackPressure) { @@ -1095,6 +1138,8 @@ void ScheduleDAGMILive::schedule() { } dbgs() << '\n'; } + if (ExitSU.getInstr() != nullptr) + ExitSU.dumpAll(this); ); if (ViewMISchedDAGs) viewGraph(); @@ -1362,7 +1407,8 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { : SU(su), BaseReg(reg), Offset(ofs) {} bool operator<(const MemOpInfo&RHS) const { - return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); + return std::tie(BaseReg, Offset, SU->NodeNum) < + std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum); } }; @@ -1395,6 +1441,24 @@ public: }; } // anonymous +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createLoadClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI) + : nullptr; +} + +std::unique_ptr<ScheduleDAGMutation> +createStoreClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI) + : nullptr; +} + +} // namespace llvm + void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; @@ -1487,29 +1551,23 @@ namespace { /// that may be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { const TargetInstrInfo &TII; - const TargetRegisterInfo &TRI; public: - MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) - : TII(TII), TRI(TRI) {} + MacroFusion(const TargetInstrInfo &TII) + : TII(TII) {} void apply(ScheduleDAGInstrs *DAGInstrs) override; }; } // anonymous -/// Returns true if \p MI reads a register written by \p Other. -static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, - const MachineInstr &Other) { - for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !MO.readsReg()) - continue; +namespace llvm { - unsigned Reg = MO.getReg(); - if (Other.modifiesRegister(Reg, &TRI)) - return true; - } - return false; +std::unique_ptr<ScheduleDAGMutation> +createMacroFusionDAGMutation(const TargetInstrInfo *TII) { + return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr; } +} // namespace llvm + /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { @@ -1521,16 +1579,12 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { if (!Branch) return; - for (SUnit &SU : DAG->SUnits) { - // SUnits with successors can't be schedule in front of the ExitSU. - if (!SU.Succs.empty()) - continue; - // We only care if the node writes to a register that the branch reads. - MachineInstr *Pred = SU.getInstr(); - if (!HasDataDep(TRI, *Branch, *Pred)) + for (SDep &PredDep : ExitSU.Preds) { + if (PredDep.isWeak()) continue; - - if (!TII.shouldScheduleAdjacent(*Pred, *Branch)) + SUnit &SU = *PredDep.getSUnit(); + MachineInstr &Pred = *SU.getInstr(); + if (!TII.shouldScheduleAdjacent(Pred, *Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1543,6 +1597,16 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { (void)Success; assert(Success && "No DAG nodes should be reachable from ExitSU"); + // Adjust latency of data deps between the nodes. + for (SDep &PredDep : ExitSU.Preds) { + if (PredDep.getSUnit() == &SU) + PredDep.setLatency(0); + } + for (SDep &SuccDep : SU.Succs) { + if (SuccDep.getSUnit() == &ExitSU) + SuccDep.setLatency(0); + } + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); break; } @@ -1572,6 +1636,16 @@ protected: }; } // anonymous +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createCopyConstrainDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + return make_unique<CopyConstrain>(TII, TRI); +} + +} // namespace llvm + /// constrainLocalCopy handles two possibilities: /// 1) Local src: /// I0: = dst @@ -1760,7 +1834,6 @@ void SchedBoundary::reset() { Available.clear(); Pending.clear(); CheckPending = false; - NextSUs.clear(); CurrCycle = 0; CurrMOps = 0; MinReadyCycle = UINT_MAX; @@ -1961,23 +2034,6 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { Pending.push(SU); else Available.push(SU); - - // Record this node as an immediate dependent of the scheduled node. - NextSUs.insert(SU); -} - -void SchedBoundary::releaseTopNode(SUnit *SU) { - if (SU->isScheduled) - return; - - releaseNode(SU, SU->TopReadyCycle); -} - -void SchedBoundary::releaseBottomNode(SUnit *SU) { - if (SU->isScheduled) - return; - - releaseNode(SU, SU->BotReadyCycle); } /// Move the boundary of scheduled code by one cycle. @@ -2828,9 +2884,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, bool SameBoundary = Zone != nullptr; if (SameBoundary) { // For loops that are acyclic path limited, aggressively schedule for - // latency. This can result in very long dependence chains scheduled in - // sequence, so once every cycle (when CurrMOps == 0), switch to normal - // heuristics. + // latency. Within an single cycle, whenever CurrMOps > 0, allow normal + // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) return; @@ -2888,13 +2943,6 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) return; - // Prefer immediate defs/users of the last scheduled instruction. This is a - // local pressure avoidance strategy that also makes the machine code - // readable. - if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU), - TryCand, Cand, NextDefUse)) - return; - // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { @@ -3105,28 +3153,24 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { +ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); - if (EnableMemOpCluster) { - if (DAG->TII->enableClusterLoads()) - DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); - if (DAG->TII->enableClusterStores()) - DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI)); - } - if (EnableMacroFusion) - DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI)); + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; } +static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { + return createGenericSchedLive(C); +} + static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createGenericSchedLive); + createConveringSched); //===----------------------------------------------------------------------===// // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. @@ -3257,9 +3301,9 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { Top.bumpNode(SU); } -/// Create a generic scheduler with no vreg liveness or DAG mutation passes. -static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true); +ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { + return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), + /*RemoveKillFlags=*/true); } //===----------------------------------------------------------------------===// |