summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp736
1 files changed, 438 insertions, 298 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index bcee15c..d921e29 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -23,13 +23,13 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include <queue>
using namespace llvm;
@@ -65,14 +65,20 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
+/// Avoid quadratic complexity in unusually large basic blocks by limiting the
+/// size of the ready lists.
+static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
+ cl::desc("Limit ready list to N instructions"), cl::init(256));
+
static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
cl::desc("Enable register pressure scheduling."), cl::init(true));
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
cl::desc("Enable cyclic critical path analysis."), cl::init(true));
-static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
- cl::desc("Enable load clustering."), cl::init(true));
+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable memop clustering."),
+ cl::init(true));
// Experimental heuristics
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
@@ -219,6 +225,11 @@ static cl::opt<bool> EnableMachineSched(
cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnablePostRAMachineSched(
+ "enable-post-misched",
+ cl::desc("Enable the post-ra machine instruction scheduling pass."),
+ cl::init(true), cl::Hidden);
+
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -314,6 +325,9 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+
if (EnableMachineSched.getNumOccurrences()) {
if (!EnableMachineSched)
return false;
@@ -349,10 +363,13 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
- if (skipOptnoneFunction(*mf.getFunction()))
+ if (skipFunction(*mf.getFunction()))
return false;
- if (!mf.getSubtarget().enablePostRAScheduler()) {
+ if (EnablePostRAMachineSched.getNumOccurrences()) {
+ if (!EnablePostRAMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enablePostRAScheduler()) {
DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
@@ -389,7 +406,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB,
MachineFunction *MF,
const TargetInstrInfo *TII) {
- return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
+ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
}
/// Main driver for both MachineScheduler and PostMachineScheduler.
@@ -427,7 +444,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
//
// MBB::size() uses instr_iterator to count. Here we need a bundle to count
// as a single instruction.
- unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
@@ -435,15 +451,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
if (RegionEnd != MBB->end() ||
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
- // Count the boundary instruction.
- --RemainingInstrs;
}
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingInstrs) {
+ for (;I != MBB->begin(); --I) {
if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
break;
if (!I->isDebugValue())
@@ -466,8 +480,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
<< "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
- dbgs() << " RegionInstrs: " << NumRegionInstrs
- << " Remaining: " << RemainingInstrs << "\n");
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
errs() << ":BB# " << MBB->getNumber();
@@ -485,7 +498,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// scheduler for the top of it's scheduled region.
RegionEnd = Scheduler.begin();
}
- assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler.finishBlock();
// FIXME: Ideally, no further passes should rely on kill flags. However,
// thumb2 size reduction is currently an exception, so the PostMIScheduler
@@ -640,7 +652,7 @@ void ScheduleDAGMI::moveInstruction(
// Update LiveIntervals
if (LIS)
- LIS->handleMove(MI, /*UpdateFlags=*/true);
+ LIS->handleMove(*MI, /*UpdateFlags=*/true);
// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
@@ -704,8 +716,7 @@ void ScheduleDAGMI::schedule() {
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
else
moveInstruction(MI, CurrentTop);
- }
- else {
+ } else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
@@ -869,13 +880,19 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
SUPressureDiffs.clear();
ShouldTrackPressure = SchedImpl->shouldTrackPressure();
+ ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
+
+ assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
+ "ShouldTrackLaneMasks requires ShouldTrackPressure");
}
// Setup the register pressure trackers for the top scheduled top and bottom
// scheduled regions.
void ScheduleDAGMILive::initRegPressure() {
- TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
- BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
+ ShouldTrackLaneMasks, false);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, false);
// Close the RPTracker to finalize live ins.
RPTracker.closeRegion();
@@ -905,7 +922,7 @@ void ScheduleDAGMILive::initRegPressure() {
// Account for liveness generated by the region boundary.
if (LiveRegionEnd != RegionEnd) {
- SmallVector<unsigned, 8> LiveUses;
+ SmallVector<RegisterMaskPair, 8> LiveUses;
BotRPTracker.recede(&LiveUses);
updatePressureDiffs(LiveUses);
}
@@ -969,47 +986,74 @@ updateScheduledPressure(const SUnit *SU,
/// Update the PressureDiff array for liveness after scheduling this
/// instruction.
-void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
- for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
+void ScheduleDAGMILive::updatePressureDiffs(
+ ArrayRef<RegisterMaskPair> LiveUses) {
+ for (const RegisterMaskPair &P : LiveUses) {
+ unsigned Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
- unsigned Reg = LiveUses[LUIdx];
- DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
if (!TRI->isVirtualRegister(Reg))
continue;
- // This may be called before CurrentBottom has been initialized. However,
- // BotRPTracker must have a valid position. We want the value live into the
- // instruction or live out of the block, so ask for the previous
- // instruction's live-out.
- const LiveInterval &LI = LIS->getInterval(Reg);
- VNInfo *VNI;
- MachineBasicBlock::const_iterator I =
- nextIfDebug(BotRPTracker.getPos(), BB->end());
- if (I == BB->end())
- VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
- else {
- LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
- VNI = LRQ.valueIn();
- }
- // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
- assert(VNI && "No live value at use.");
- for (const VReg2SUnit &V2SU
- : make_range(VRegUses.find(Reg), VRegUses.end())) {
- SUnit *SU = V2SU.SU;
- // If this use comes before the reaching def, it cannot be a last use, so
- // descrease its pressure change.
- if (!SU->isScheduled && SU != &ExitSU) {
- LiveQueryResult LRQ
- = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
- if (LRQ.valueIn() == VNI) {
- PressureDiff &PDiff = getPressureDiff(SU);
- PDiff.addPressureChange(Reg, true, &MRI);
- DEBUG(
- dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
- << *SU->getInstr();
- dbgs() << " to ";
- PDiff.dump(*TRI);
- );
+ if (ShouldTrackLaneMasks) {
+ // If the register has just become live then other uses won't change
+ // this fact anymore => decrement pressure.
+ // If the register has just become dead then other uses make it come
+ // back to life => increment pressure.
+ bool Decrement = P.LaneMask != 0;
+
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit &SU = *V2SU.SU;
+ if (SU.isScheduled || &SU == &ExitSU)
+ continue;
+
+ PressureDiff &PDiff = getPressureDiff(&SU);
+ PDiff.addPressureChange(Reg, Decrement, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
+ << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
+ << ' ' << *SU.getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
+ } else {
+ assert(P.LaneMask != 0);
+ DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+ // This may be called before CurrentBottom has been initialized. However,
+ // BotRPTracker must have a valid position. We want the value live into the
+ // instruction or live out of the block, so ask for the previous
+ // instruction's live-out.
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI;
+ MachineBasicBlock::const_iterator I =
+ nextIfDebug(BotRPTracker.getPos(), BB->end());
+ if (I == BB->end())
+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ else {
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
+ VNI = LRQ.valueIn();
+ }
+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
+ assert(VNI && "No live value at use.");
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ // If this use comes before the reaching def, it cannot be a last use,
+ // so decrease its pressure change.
+ if (!SU->isScheduled && SU != &ExitSU) {
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
}
}
}
@@ -1057,11 +1101,6 @@ void ScheduleDAGMILive::schedule() {
// Initialize ready queues now that the DAG and priority data are finalized.
initQueues(TopRoots, BotRoots);
- if (ShouldTrackPressure) {
- assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
- TopRPTracker.setPos(CurrentTop);
- }
-
bool IsTopNode = false;
while (true) {
DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
@@ -1111,14 +1150,14 @@ void ScheduleDAGMILive::buildDAGWithRegPressure() {
// Initialize the register pressure tracker used by buildSchedGraph.
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
- /*TrackUntiedDefs=*/true);
+ ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
// Account for liveness generate by the region boundary.
if (LiveRegionEnd != RegionEnd)
RPTracker.recede();
// Build the DAG, and compute current register pressure.
- buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
// Initialize top/bottom trackers after computing region pressure.
initRegPressure();
@@ -1167,10 +1206,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned MaxCyclicLatency = 0;
// Visit each live out vreg def to find def/use pairs that cross iterations.
- ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
- for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
- RI != RE; ++RI) {
- unsigned Reg = *RI;
+ for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
+ unsigned Reg = P.RegUnit;
if (!TRI->isVirtualRegister(Reg))
continue;
const LiveInterval &LI = LIS->getInterval(Reg);
@@ -1193,8 +1230,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
continue;
// Only consider uses of the phi.
- LiveQueryResult LRQ =
- LI.Query(LIS->getInstructionIndex(SU->getInstr()));
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
if (!LRQ.valueIn()->isPHIDef())
continue;
@@ -1209,8 +1245,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
if (LiveInHeight > LiveOutHeight) {
if (LiveInHeight - LiveOutHeight < CyclicLatency)
CyclicLatency = LiveInHeight - LiveOutHeight;
- }
- else
+ } else
CyclicLatency = 0;
DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
@@ -1223,6 +1258,17 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
return MaxCyclicLatency;
}
+/// Release ExitSU predecessors and setup scheduler queues. Re-position
+/// the Top RP tracker in case the region beginning has changed.
+void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ ScheduleDAGMI::initQueues(TopRoots, BotRoots);
+ if (ShouldTrackPressure) {
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+ }
+}
+
/// Move an instruction and update register pressure.
void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
// Move the instruction to its new location in the instruction stream.
@@ -1239,7 +1285,18 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
if (ShouldTrackPressure) {
// Update top scheduled pressure.
- TopRPTracker.advance();
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ TopRPTracker.advance(RegOpers);
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
DEBUG(
dbgs() << "Top Pressure:\n";
@@ -1248,8 +1305,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
}
- }
- else {
+ } else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
@@ -1264,9 +1320,20 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
CurrentBottom = MI;
}
if (ShouldTrackPressure) {
- // Update bottom scheduled pressure.
- SmallVector<unsigned, 8> LiveUses;
- BotRPTracker.recede(&LiveUses);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ BotRPTracker.recedeSkipDebugValues();
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(RegOpers, &LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
DEBUG(
dbgs() << "Bottom Pressure:\n";
@@ -1280,64 +1347,81 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
}
//===----------------------------------------------------------------------===//
-// LoadClusterMutation - DAG post-processing to cluster loads.
+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
//===----------------------------------------------------------------------===//
namespace {
/// \brief Post-process the DAG to create cluster edges between neighboring
-/// loads.
-class LoadClusterMutation : public ScheduleDAGMutation {
- struct LoadInfo {
+/// loads or between neighboring stores.
+class BaseMemOpClusterMutation : public ScheduleDAGMutation {
+ struct MemOpInfo {
SUnit *SU;
unsigned BaseReg;
- unsigned Offset;
- LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ int64_t Offset;
+ MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
- bool operator<(const LoadInfo &RHS) const {
+ bool operator<(const MemOpInfo&RHS) const {
return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
}
};
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ bool IsLoad;
+
public:
- LoadClusterMutation(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : TII(tii), TRI(tri) {}
+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, bool IsLoad)
+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
- void apply(ScheduleDAGMI *DAG) override;
protected:
- void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+};
+
+class StoreClusterMutation : public BaseMemOpClusterMutation {
+public:
+ StoreClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, false) {}
+};
+
+class LoadClusterMutation : public BaseMemOpClusterMutation {
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, true) {}
};
} // anonymous
-void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
- ScheduleDAGMI *DAG) {
- SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
- for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
- SUnit *SU = Loads[Idx];
+void BaseMemOpClusterMutation::clusterNeighboringMemOps(
+ ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
+ SUnit *SU = MemOps[Idx];
unsigned BaseReg;
- unsigned Offset;
- if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
- LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ int64_t Offset;
+ if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
}
- if (LoadRecords.size() < 2)
+ if (MemOpRecords.size() < 2)
return;
- std::sort(LoadRecords.begin(), LoadRecords.end());
+
+ std::sort(MemOpRecords.begin(), MemOpRecords.end());
unsigned ClusterLength = 1;
- for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
- if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
+ if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
ClusterLength = 1;
continue;
}
- SUnit *SUa = LoadRecords[Idx].SU;
- SUnit *SUb = LoadRecords[Idx+1].SU;
- if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
- && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
-
- DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ SUnit *SUa = MemOpRecords[Idx].SU;
+ SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
+ ClusterLength) &&
+ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+ DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse.
@@ -1351,22 +1435,26 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
- }
- else
+ } else
ClusterLength = 1;
}
}
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
-void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
- // Map each store chain to a set of dependent loads.
+ // Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->mayLoad())
+ if ((IsLoad && !SU->getInstr()->mayLoad()) ||
+ (!IsLoad && !SU->getInstr()->mayStore()))
continue;
+
unsigned ChainPredID = DAG->SUnits.size();
for (SUnit::const_pred_iterator
PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
@@ -1376,7 +1464,7 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
}
}
// Check if this chain-like pred has been seen
- // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ // before. ChainPredID==MaxNodeID at the top of the schedule.
unsigned NumChains = StoreChainDependents.size();
std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
@@ -1384,9 +1472,10 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
StoreChainDependents.resize(NumChains + 1);
StoreChainDependents[Result.first->second].push_back(SU);
}
+
// Iterate over the store chains.
for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+ clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
}
//===----------------------------------------------------------------------===//
@@ -1403,7 +1492,7 @@ public:
MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
: TII(TII), TRI(TRI) {}
- void apply(ScheduleDAGMI *DAG) override;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
} // anonymous
@@ -1423,7 +1512,9 @@ static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
-void MacroFusion::apply(ScheduleDAGMI *DAG) {
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
// For now, assume targets can only fuse with the branch.
SUnit &ExitSU = DAG->ExitSU;
MachineInstr *Branch = ExitSU.getInstr();
@@ -1439,7 +1530,7 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
if (!HasDataDep(TRI, *Branch, *Pred))
continue;
- if (!TII.shouldScheduleAdjacent(Pred, Branch))
+ if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1474,7 +1565,7 @@ class CopyConstrain : public ScheduleDAGMutation {
public:
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
- void apply(ScheduleDAGMI *DAG) override;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
@@ -1505,12 +1596,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
MachineInstr *Copy = CopySU->getInstr();
// Check for pure vreg copies.
- unsigned SrcReg = Copy->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ const MachineOperand &SrcOp = Copy->getOperand(1);
+ unsigned SrcReg = SrcOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
return;
- unsigned DstReg = Copy->getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ const MachineOperand &DstOp = Copy->getOperand(0);
+ unsigned DstReg = DstOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
return;
// Check if either the dest or source is local. If it's live across a back
@@ -1627,15 +1720,16 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
/// \brief Callback from DAG postProcessing to create weak edges to encourage
/// copy elimination.
-void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
if (FirstPos == DAG->end())
return;
- RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
- &*priorNonDebug(DAG->end(), DAG->begin()));
+ *priorNonDebug(DAG->end(), DAG->begin()));
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
@@ -1862,7 +1956,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
+ if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
+ Available.size() >= ReadyListLimit)
Pending.push(SU);
else
Available.push(SU);
@@ -1905,8 +2000,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
if (!HazardRec->isEnabled()) {
// Bypass HazardRec virtual calls.
CurrCycle = NextCycle;
- }
- else {
+ } else {
// Bypass getHazardType calls in case of long latency.
for (; CurrCycle != NextCycle; ++CurrCycle) {
if (isTop())
@@ -2074,8 +2168,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// If we stall for any reason, bump the cycle.
if (NextCycle > CurrCycle) {
bumpCycle(NextCycle);
- }
- else {
+ } else {
// After updating ZoneCritResIdx and ExpectedLatency, check if we're
// resource limited. If a stall occurred, bumpCycle does this.
unsigned LFactor = SchedModel->getLatencyFactor();
@@ -2119,11 +2212,13 @@ void SchedBoundary::releasePending() {
if (checkHazard(SU))
continue;
+ if (Available.size() >= ReadyListLimit)
+ break;
+
Available.push(SU);
Pending.remove(Pending.begin()+i);
--i; --e;
}
- DEBUG(if (!Pending.empty()) Pending.dump());
CheckPending = false;
}
@@ -2163,6 +2258,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
bumpCycle(CurrCycle + 1);
releasePending();
}
+
+ DEBUG(Pending.dump());
+ DEBUG(Available.dump());
+
if (Available.size() == 1)
return *Available.begin();
return nullptr;
@@ -2177,8 +2276,7 @@ void SchedBoundary::dumpScheduledState() {
if (ZoneCritResIdx) {
ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
ResCount = getResourceCount(ZoneCritResIdx);
- }
- else {
+ } else {
ResFactor = SchedModel->getMicroOpFactor();
ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
}
@@ -2218,8 +2316,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
/// Set the CandPolicy given a scheduling zone given the current resources and
/// latencies inside and outside the zone.
-void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
- bool IsPostRA,
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
SchedBoundary &CurrZone,
SchedBoundary *OtherZone) {
// Apply preemptive heuristics based on the total latency and resources
@@ -2295,7 +2392,8 @@ const char *GenericSchedulerBase::getReasonStr(
GenericSchedulerBase::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
- case PhysRegCopy: return "PREG-COPY";
+ case Only1: return "ONLY1 ";
+ case PhysRegCopy: return "PREG-COPY ";
case RegExcess: return "REG-EXCESS";
case RegCritical: return "REG-CRIT ";
case Stall: return "STALL ";
@@ -2381,7 +2479,6 @@ static bool tryLess(int TryVal, int CandVal,
Cand.Reason = Reason;
return true;
}
- Cand.setRepeat(Reason);
return false;
}
@@ -2398,7 +2495,6 @@ static bool tryGreater(int TryVal, int CandVal,
Cand.Reason = Reason;
return true;
}
- Cand.setRepeat(Reason);
return false;
}
@@ -2414,8 +2510,7 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::TopPathReduce))
return true;
- }
- else {
+ } else {
if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
@@ -2428,10 +2523,13 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
return false;
}
-static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
- bool IsTop) {
+static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
- << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
+ tracePick(Cand.Reason, Cand.AtTop);
}
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
@@ -2460,6 +2558,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
Itin, DAG);
}
+ TopCand.SU = nullptr;
+ BotCand.SU = nullptr;
}
/// Initialize the per-region scheduling policy.
@@ -2487,8 +2587,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
RegionPolicy.OnlyBottomUp = true;
// Allow the subtarget to override default policy.
- MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
- NumRegionInstrs);
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
// After subtarget overrides, apply command line options.
if (!EnableRegPressure)
@@ -2582,19 +2681,25 @@ static bool tryPressure(const PressureChange &TryP,
GenericSchedulerBase::CandReason Reason,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) {
- unsigned TryPSet = TryP.getPSetOrMax();
- unsigned CandPSet = CandP.getPSetOrMax();
- // If both candidates affect the same set, go with the smallest increase.
- if (TryPSet == CandPSet) {
- return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
- Reason);
- }
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
Reason)) {
return true;
}
+ // Do not compare the magnitude of pressure changes between top and bottom
+ // boundary.
+ if (Cand.AtTop != TryCand.AtTop)
+ return false;
+
+ // If both candidates affect the same set in the same boundary, go with the
+ // smallest increase.
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
+ if (TryPSet == CandPSet) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
+ Reason);
+ }
int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
std::numeric_limits<int>::max();
@@ -2640,64 +2745,64 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
return 0;
}
-/// Apply a set of heursitics to a new candidate. Heuristics are currently
-/// hierarchical. This may be more efficient than a graduated cost model because
-/// we don't need to evaluate all aspects of the model for each node in the
-/// queue. But it's really done to make the heuristics easier to debug and
-/// statistically analyze.
-///
-/// \param Cand provides the policy and current best candidate.
-/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
-/// \param Zone describes the scheduled zone that we are extending.
-/// \param RPTracker describes reg pressure within the scheduled zone.
-/// \param TempTracker is a scratch pressure tracker to reuse in queries.
-void GenericScheduler::tryCandidate(SchedCandidate &Cand,
- SchedCandidate &TryCand,
- SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- RegPressureTracker &TempTracker) {
-
+void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+ Cand.SU = SU;
+ Cand.AtTop = AtTop;
if (DAG->isTrackingPressure()) {
- // Always initialize TryCand's RPDelta.
- if (Zone.isTop()) {
+ if (AtTop) {
TempTracker.getMaxDownwardPressureDelta(
- TryCand.SU->getInstr(),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
- }
- else {
+ } else {
if (VerifyScheduling) {
TempTracker.getMaxUpwardPressureDelta(
- TryCand.SU->getInstr(),
- &DAG->getPressureDiff(TryCand.SU),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ &DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
- }
- else {
+ } else {
RPTracker.getUpwardPressureDelta(
- TryCand.SU->getInstr(),
- DAG->getPressureDiff(TryCand.SU),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
}
}
}
- DEBUG(if (TryCand.RPDelta.Excess.isValid())
- dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") "
- << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
- << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
+ DEBUG(if (Cand.RPDelta.Excess.isValid())
+ dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
+ << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
+}
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending, or nullptr
+// if Cand is from a different zone than TryCand.
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
}
- if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
- biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
+ biasPhysRegCopy(Cand.SU, Cand.AtTop),
TryCand, Cand, PhysRegCopy))
return;
@@ -2715,17 +2820,26 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;
- // For loops that are acyclic path limited, aggressively schedule for latency.
- // This can result in very long dependence chains scheduled in sequence, so
- // once every cycle (when CurrMOps == 0), switch to normal heuristics.
- if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
- && tryLatency(TryCand, Cand, Zone))
- return;
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. This can result in very long dependence chains scheduled in
+ // sequence, so once every cycle (when CurrMOps == 0), switch to normal
+ // heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return;
- // Prioritize instructions that read unbuffered resources by stall cycles.
- if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
- Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+ }
// Keep clustered nodes together to encourage downstream peephole
// optimizations which may reduce resource requirements.
@@ -2733,18 +2847,23 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const SUnit *NextClusterSU =
- Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
- if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU,
TryCand, Cand, Cluster))
return;
- // Weak edges are for clustering and other constraints.
- if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
- getWeakLeft(Cand.SU, Zone.isTop()),
- TryCand, Cand, Weak)) {
- return;
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop),
+ TryCand, Cand, Weak))
+ return;
}
+
// Avoid increasing the max pressure of the entire region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
Cand.RPDelta.CurrentMax,
@@ -2752,34 +2871,35 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;
- // Avoid critical resource consumption and balance the schedule.
- TryCand.initResourceDelta(DAG, SchedModel);
- if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
- TryCand, Cand, ResourceReduce))
- return;
- if (tryGreater(TryCand.ResDelta.DemandedResources,
- Cand.ResDelta.DemandedResources,
- TryCand, Cand, ResourceDemand))
- return;
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
- // Avoid serializing long latency dependence chains.
- // For acyclic path limited loops, latency was already checked above.
- if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
- !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
- return;
- }
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return;
- // Prefer immediate defs/users of the last scheduled instruction. This is a
- // local pressure avoidance strategy that also makes the machine code
- // readable.
- if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
- TryCand, Cand, NextDefUse))
- return;
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // local pressure avoidance strategy that also makes the machine code
+ // readable.
+ if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
- // Fall through to original instruction order.
- if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
- || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
- TryCand.Reason = NodeOrder;
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
}
}
@@ -2789,20 +2909,20 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
- ReadyQueue &Q = Zone.Available;
-
- DEBUG(Q.dump());
-
// getMaxPressureDelta temporarily modifies the tracker.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+ ReadyQueue &Q = Zone.Available;
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
- SchedCandidate TryCand(Cand.Policy);
- TryCand.SU = *I;
- tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ tryCandidate(Cand, TryCand, ZoneArg);
if (TryCand.Reason != NoCand) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
@@ -2819,57 +2939,77 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
- DEBUG(dbgs() << "Pick Bot ONLY1\n");
+ tracePick(Only1, false);
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
- DEBUG(dbgs() << "Pick Top ONLY1\n");
+ tracePick(Only1, true);
return SU;
}
- CandPolicy NoPolicy;
- SchedCandidate BotCand(NoPolicy);
- SchedCandidate TopCand(NoPolicy);
// Set the bottom-up policy based on the state of the current bottom zone and
// the instructions outside the zone, including the top zone.
- setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
// the instructions outside the zone, including the bottom zone.
- setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
-
- // Prefer bottom scheduling when heuristics are silent.
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
- assert(BotCand.Reason != NoCand && "failed to find the first candidate");
-
- // If either Q has a single candidate that provides the least increase in
- // Excess pressure, we can immediately schedule from that Q.
- //
- // RegionCriticalPSets summarizes the pressure within the scheduled region and
- // affects picking from either Q. If scheduling in one direction must
- // increase pressure for one of the excess PSets, then schedule in that
- // direction first to provide more freedom in the other direction.
- if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
- || (BotCand.Reason == RegCritical
- && !BotCand.isRepeat(RegCritical)))
- {
- IsTopNode = false;
- tracePick(BotCand, IsTopNode);
- return BotCand.SU;
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
}
+
// Check if the top Q has a better candidate.
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
- assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
- // Choose the queue with the most important (lowest enum) reason.
- if (TopCand.Reason < BotCand.Reason) {
- IsTopNode = true;
- tracePick(TopCand, IsTopNode);
- return TopCand.SU;
+ // Pick best from BotCand and TopCand.
+ assert(BotCand.isValid());
+ assert(TopCand.isValid());
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ tryCandidate(Cand, TopCand, nullptr);
+ if (TopCand.Reason != NoCand) {
+ Cand.setBest(TopCand);
+ DEBUG(traceCandidate(Cand));
}
- // Otherwise prefer the bottom candidate, in node order if all else failed.
- IsTopNode = false;
- tracePick(BotCand, IsTopNode);
- return BotCand.SU;
+
+ IsTopNode = Cand.AtTop;
+ tracePick(Cand);
+ return Cand.SU;
}
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
@@ -2885,27 +3025,25 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
SU = Top.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
- SchedCandidate TopCand(NoPolicy);
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ TopCand.reset(NoPolicy);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand, true);
+ tracePick(TopCand);
SU = TopCand.SU;
}
IsTopNode = true;
- }
- else if (RegionPolicy.OnlyBottomUp) {
+ } else if (RegionPolicy.OnlyBottomUp) {
SU = Bot.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
- SchedCandidate BotCand(NoPolicy);
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ BotCand.reset(NoPolicy);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
- tracePick(BotCand, false);
+ tracePick(BotCand);
SU = BotCand.SU;
}
IsTopNode = false;
- }
- else {
+ } else {
SU = pickNodeBidirectional(IsTopNode);
}
} while (SU->isScheduled);
@@ -2957,8 +3095,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysRegCopies(SU, true);
- }
- else {
+ } else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
@@ -2976,8 +3113,12 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
- if (EnableLoadCluster && DAG->TII->enableClusterLoads())
- DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (EnableMemOpCluster) {
+ if (DAG->TII->enableClusterLoads())
+ DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (DAG->TII->enableClusterStores())
+ DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
+ }
if (EnableMacroFusion)
DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
@@ -3065,12 +3206,10 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
ReadyQueue &Q = Top.Available;
-
- DEBUG(Q.dump());
-
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
SchedCandidate TryCand(Cand.Policy);
TryCand.SU = *I;
+ TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
if (TryCand.Reason != NoCand) {
@@ -3089,7 +3228,9 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
SUnit *SU;
do {
SU = Top.pickOnlyChoice();
- if (!SU) {
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
CandPolicy NoPolicy;
SchedCandidate TopCand(NoPolicy);
// Set the top-down policy based on the state of the current top zone and
@@ -3097,7 +3238,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
pickNodeFromQueue(TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand, true);
+ tracePick(TopCand);
SU = TopCand.SU;
}
} while (SU->isScheduled);
@@ -3285,8 +3426,7 @@ public:
TopQ.pop();
} while (SU->isScheduled);
IsTopNode = true;
- }
- else {
+ } else {
do {
if (BottomQ.empty()) return nullptr;
SU = BottomQ.top();
OpenPOWER on IntegriCloud