summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2016-12-26 20:36:37 +0000
committerdim <dim@FreeBSD.org>2016-12-26 20:36:37 +0000
commit06210ae42d418d50d8d9365d5c9419308ae9e7ee (patch)
treeab60b4cdd6e430dda1f292a46a77ddb744723f31 /contrib/llvm/lib/CodeGen/MachineScheduler.cpp
parent2dd166267f53df1c3748b4325d294b9b839de74b (diff)
downloadFreeBSD-src-06210ae42d418d50d8d9365d5c9419308ae9e7ee.zip
FreeBSD-src-06210ae42d418d50d8d9365d5c9419308ae9e7ee.tar.gz
MFC r309124:
Upgrade our copies of clang, llvm, lldb, compiler-rt and libc++ to 3.9.0 release, and add lld 3.9.0. Also completely revamp the build system for clang, llvm, lldb and their related tools. Please note that from 3.5.0 onwards, clang, llvm and lldb require C++11 support to build; see UPDATING for more information. Release notes for llvm, clang and lld are available here: <http://llvm.org/releases/3.9.0/docs/ReleaseNotes.html> <http://llvm.org/releases/3.9.0/tools/clang/docs/ReleaseNotes.html> <http://llvm.org/releases/3.9.0/tools/lld/docs/ReleaseNotes.html> Thanks to Ed Maste, Bryan Drewery, Andrew Turner, Antoine Brodin and Jan Beich for their help. Relnotes: yes MFC r309147: Pull in r282174 from upstream llvm trunk (by Krzysztof Parzyszek): [PPC] Set SP after loading data from stack frame, if no red zone is present Follow-up to r280705: Make sure that the SP is only restored after all data is loaded from the stack frame, if there is no red zone. This completes the fix for https://llvm.org/bugs/show_bug.cgi?id=26519. Differential Revision: https://reviews.llvm.org/D24466 Reported by: Mark Millard PR: 214433 MFC r309149: Pull in r283060 from upstream llvm trunk (by Hal Finkel): [PowerPC] Refactor soft-float support, and enable PPC64 soft float This change enables soft-float for PowerPC64, and also makes soft-float disable all vector instruction sets for both 32-bit and 64-bit modes. This latter part is necessary because the PPC backend canonicalizes many Altivec vector types to floating-point types, and so soft-float breaks scalarization support for many operations. Both for embedded targets and for operating-system kernels desiring soft-float support, it seems reasonable that disabling hardware floating-point also disables vector instructions (embedded targets without hardware floating point support are unlikely to have Altivec, etc. and operating system kernels desiring not to use floating-point registers to lower syscall cost are unlikely to want to use vector registers either). If someone needs this to work, we'll need to change the fact that we promote many Altivec operations to act on v4f32. To make it possible to disable Altivec when soft-float is enabled, hardware floating-point support needs to be expressed as a positive feature, like the others, and not a negative feature, because target features cannot have dependencies on the disabling of some other feature. So +soft-float has now become -hard-float. Fixes PR26970. Pull in r283061 from upstream clang trunk (by Hal Finkel): [PowerPC] Enable soft-float for PPC64, and +soft-float -> -hard-float Enable soft-float support on PPC64, as the backend now supports it. Also, the backend now uses -hard-float instead of +soft-float, so set the target features accordingly. Fixes PR26970. Reported by: Mark Millard PR: 214433 MFC r309212: Add a few missed clang 3.9.0 files to OptionalObsoleteFiles. MFC r309262: Fix packaging for clang, lldb and lld 3.9.0 During the upgrade of clang/llvm etc to 3.9.0 in r309124, the PACKAGE directive in the usr.bin/clang/*.mk files got dropped accidentally. Restore it, with a few minor changes and additions: * Correct license in clang.ucl to NCSA * Add PACKAGE=clang for clang and most of the "ll" tools * Put lldb in its own package * Put lld in its own package Reviewed by: gjb, jmallett Differential Revision: https://reviews.freebsd.org/D8666 MFC r309656: During the bootstrap phase, when building the minimal llvm library on PowerPC, add lib/Support/Atomic.cpp. This is needed because upstream llvm revision r271821 disabled the use of std::call_once, which causes some fallback functions from Atomic.cpp to be used instead. Reported by: Mark Millard PR: 214902 MFC r309835: Tentatively apply https://reviews.llvm.org/D18730 to work around gcc PR 70528 (bogus error: constructor required before non-static data member). This should fix buildworld with the external gcc package. Reported by: https://jenkins.freebsd.org/job/FreeBSD_HEAD_amd64_gcc/ MFC r310194: Upgrade our copies of clang, llvm, lld, lldb, compiler-rt and libc++ to 3.9.1 release. Please note that from 3.5.0 onwards, clang, llvm and lldb require C++11 support to build; see UPDATING for more information. Release notes for llvm, clang and lld will be available here: <http://releases.llvm.org/3.9.1/docs/ReleaseNotes.html> <http://releases.llvm.org/3.9.1/tools/clang/docs/ReleaseNotes.html> <http://releases.llvm.org/3.9.1/tools/lld/docs/ReleaseNotes.html> Relnotes: yes
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp736
1 files changed, 438 insertions, 298 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index bcee15c..d921e29 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -23,13 +23,13 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include <queue>
using namespace llvm;
@@ -65,14 +65,20 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
+/// Avoid quadratic complexity in unusually large basic blocks by limiting the
+/// size of the ready lists.
+static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
+ cl::desc("Limit ready list to N instructions"), cl::init(256));
+
static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
cl::desc("Enable register pressure scheduling."), cl::init(true));
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
cl::desc("Enable cyclic critical path analysis."), cl::init(true));
-static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
- cl::desc("Enable load clustering."), cl::init(true));
+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable memop clustering."),
+ cl::init(true));
// Experimental heuristics
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
@@ -219,6 +225,11 @@ static cl::opt<bool> EnableMachineSched(
cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnablePostRAMachineSched(
+ "enable-post-misched",
+ cl::desc("Enable the post-ra machine instruction scheduling pass."),
+ cl::init(true), cl::Hidden);
+
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -314,6 +325,9 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+
if (EnableMachineSched.getNumOccurrences()) {
if (!EnableMachineSched)
return false;
@@ -349,10 +363,13 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
- if (skipOptnoneFunction(*mf.getFunction()))
+ if (skipFunction(*mf.getFunction()))
return false;
- if (!mf.getSubtarget().enablePostRAScheduler()) {
+ if (EnablePostRAMachineSched.getNumOccurrences()) {
+ if (!EnablePostRAMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enablePostRAScheduler()) {
DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
@@ -389,7 +406,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB,
MachineFunction *MF,
const TargetInstrInfo *TII) {
- return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
+ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
}
/// Main driver for both MachineScheduler and PostMachineScheduler.
@@ -427,7 +444,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
//
// MBB::size() uses instr_iterator to count. Here we need a bundle to count
// as a single instruction.
- unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
@@ -435,15 +451,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
if (RegionEnd != MBB->end() ||
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
- // Count the boundary instruction.
- --RemainingInstrs;
}
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingInstrs) {
+ for (;I != MBB->begin(); --I) {
if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
break;
if (!I->isDebugValue())
@@ -466,8 +480,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
<< "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
- dbgs() << " RegionInstrs: " << NumRegionInstrs
- << " Remaining: " << RemainingInstrs << "\n");
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
errs() << ":BB# " << MBB->getNumber();
@@ -485,7 +498,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// scheduler for the top of it's scheduled region.
RegionEnd = Scheduler.begin();
}
- assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler.finishBlock();
// FIXME: Ideally, no further passes should rely on kill flags. However,
// thumb2 size reduction is currently an exception, so the PostMIScheduler
@@ -640,7 +652,7 @@ void ScheduleDAGMI::moveInstruction(
// Update LiveIntervals
if (LIS)
- LIS->handleMove(MI, /*UpdateFlags=*/true);
+ LIS->handleMove(*MI, /*UpdateFlags=*/true);
// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
@@ -704,8 +716,7 @@ void ScheduleDAGMI::schedule() {
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
else
moveInstruction(MI, CurrentTop);
- }
- else {
+ } else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
@@ -869,13 +880,19 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
SUPressureDiffs.clear();
ShouldTrackPressure = SchedImpl->shouldTrackPressure();
+ ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
+
+ assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
+ "ShouldTrackLaneMasks requires ShouldTrackPressure");
}
// Setup the register pressure trackers for the top scheduled top and bottom
// scheduled regions.
void ScheduleDAGMILive::initRegPressure() {
- TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
- BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
+ ShouldTrackLaneMasks, false);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, false);
// Close the RPTracker to finalize live ins.
RPTracker.closeRegion();
@@ -905,7 +922,7 @@ void ScheduleDAGMILive::initRegPressure() {
// Account for liveness generated by the region boundary.
if (LiveRegionEnd != RegionEnd) {
- SmallVector<unsigned, 8> LiveUses;
+ SmallVector<RegisterMaskPair, 8> LiveUses;
BotRPTracker.recede(&LiveUses);
updatePressureDiffs(LiveUses);
}
@@ -969,47 +986,74 @@ updateScheduledPressure(const SUnit *SU,
/// Update the PressureDiff array for liveness after scheduling this
/// instruction.
-void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
- for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
+void ScheduleDAGMILive::updatePressureDiffs(
+ ArrayRef<RegisterMaskPair> LiveUses) {
+ for (const RegisterMaskPair &P : LiveUses) {
+ unsigned Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
- unsigned Reg = LiveUses[LUIdx];
- DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
if (!TRI->isVirtualRegister(Reg))
continue;
- // This may be called before CurrentBottom has been initialized. However,
- // BotRPTracker must have a valid position. We want the value live into the
- // instruction or live out of the block, so ask for the previous
- // instruction's live-out.
- const LiveInterval &LI = LIS->getInterval(Reg);
- VNInfo *VNI;
- MachineBasicBlock::const_iterator I =
- nextIfDebug(BotRPTracker.getPos(), BB->end());
- if (I == BB->end())
- VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
- else {
- LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
- VNI = LRQ.valueIn();
- }
- // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
- assert(VNI && "No live value at use.");
- for (const VReg2SUnit &V2SU
- : make_range(VRegUses.find(Reg), VRegUses.end())) {
- SUnit *SU = V2SU.SU;
- // If this use comes before the reaching def, it cannot be a last use, so
- // descrease its pressure change.
- if (!SU->isScheduled && SU != &ExitSU) {
- LiveQueryResult LRQ
- = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
- if (LRQ.valueIn() == VNI) {
- PressureDiff &PDiff = getPressureDiff(SU);
- PDiff.addPressureChange(Reg, true, &MRI);
- DEBUG(
- dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
- << *SU->getInstr();
- dbgs() << " to ";
- PDiff.dump(*TRI);
- );
+ if (ShouldTrackLaneMasks) {
+ // If the register has just become live then other uses won't change
+ // this fact anymore => decrement pressure.
+ // If the register has just become dead then other uses make it come
+ // back to life => increment pressure.
+ bool Decrement = P.LaneMask != 0;
+
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit &SU = *V2SU.SU;
+ if (SU.isScheduled || &SU == &ExitSU)
+ continue;
+
+ PressureDiff &PDiff = getPressureDiff(&SU);
+ PDiff.addPressureChange(Reg, Decrement, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
+ << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
+ << ' ' << *SU.getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
+ } else {
+ assert(P.LaneMask != 0);
+ DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+ // This may be called before CurrentBottom has been initialized. However,
+ // BotRPTracker must have a valid position. We want the value live into the
+ // instruction or live out of the block, so ask for the previous
+ // instruction's live-out.
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI;
+ MachineBasicBlock::const_iterator I =
+ nextIfDebug(BotRPTracker.getPos(), BB->end());
+ if (I == BB->end())
+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ else {
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
+ VNI = LRQ.valueIn();
+ }
+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
+ assert(VNI && "No live value at use.");
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ // If this use comes before the reaching def, it cannot be a last use,
+ // so decrease its pressure change.
+ if (!SU->isScheduled && SU != &ExitSU) {
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
}
}
}
@@ -1057,11 +1101,6 @@ void ScheduleDAGMILive::schedule() {
// Initialize ready queues now that the DAG and priority data are finalized.
initQueues(TopRoots, BotRoots);
- if (ShouldTrackPressure) {
- assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
- TopRPTracker.setPos(CurrentTop);
- }
-
bool IsTopNode = false;
while (true) {
DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
@@ -1111,14 +1150,14 @@ void ScheduleDAGMILive::buildDAGWithRegPressure() {
// Initialize the register pressure tracker used by buildSchedGraph.
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
- /*TrackUntiedDefs=*/true);
+ ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
// Account for liveness generate by the region boundary.
if (LiveRegionEnd != RegionEnd)
RPTracker.recede();
// Build the DAG, and compute current register pressure.
- buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
// Initialize top/bottom trackers after computing region pressure.
initRegPressure();
@@ -1167,10 +1206,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned MaxCyclicLatency = 0;
// Visit each live out vreg def to find def/use pairs that cross iterations.
- ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
- for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
- RI != RE; ++RI) {
- unsigned Reg = *RI;
+ for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
+ unsigned Reg = P.RegUnit;
if (!TRI->isVirtualRegister(Reg))
continue;
const LiveInterval &LI = LIS->getInterval(Reg);
@@ -1193,8 +1230,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
continue;
// Only consider uses of the phi.
- LiveQueryResult LRQ =
- LI.Query(LIS->getInstructionIndex(SU->getInstr()));
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
if (!LRQ.valueIn()->isPHIDef())
continue;
@@ -1209,8 +1245,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
if (LiveInHeight > LiveOutHeight) {
if (LiveInHeight - LiveOutHeight < CyclicLatency)
CyclicLatency = LiveInHeight - LiveOutHeight;
- }
- else
+ } else
CyclicLatency = 0;
DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
@@ -1223,6 +1258,17 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
return MaxCyclicLatency;
}
+/// Release ExitSU predecessors and setup scheduler queues. Re-position
+/// the Top RP tracker in case the region beginning has changed.
+void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ ScheduleDAGMI::initQueues(TopRoots, BotRoots);
+ if (ShouldTrackPressure) {
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+ }
+}
+
/// Move an instruction and update register pressure.
void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
// Move the instruction to its new location in the instruction stream.
@@ -1239,7 +1285,18 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
if (ShouldTrackPressure) {
// Update top scheduled pressure.
- TopRPTracker.advance();
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ TopRPTracker.advance(RegOpers);
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
DEBUG(
dbgs() << "Top Pressure:\n";
@@ -1248,8 +1305,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
}
- }
- else {
+ } else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
@@ -1264,9 +1320,20 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
CurrentBottom = MI;
}
if (ShouldTrackPressure) {
- // Update bottom scheduled pressure.
- SmallVector<unsigned, 8> LiveUses;
- BotRPTracker.recede(&LiveUses);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ BotRPTracker.recedeSkipDebugValues();
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(RegOpers, &LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
DEBUG(
dbgs() << "Bottom Pressure:\n";
@@ -1280,64 +1347,81 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
}
//===----------------------------------------------------------------------===//
-// LoadClusterMutation - DAG post-processing to cluster loads.
+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
//===----------------------------------------------------------------------===//
namespace {
/// \brief Post-process the DAG to create cluster edges between neighboring
-/// loads.
-class LoadClusterMutation : public ScheduleDAGMutation {
- struct LoadInfo {
+/// loads or between neighboring stores.
+class BaseMemOpClusterMutation : public ScheduleDAGMutation {
+ struct MemOpInfo {
SUnit *SU;
unsigned BaseReg;
- unsigned Offset;
- LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ int64_t Offset;
+ MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
- bool operator<(const LoadInfo &RHS) const {
+ bool operator<(const MemOpInfo&RHS) const {
return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
}
};
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ bool IsLoad;
+
public:
- LoadClusterMutation(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : TII(tii), TRI(tri) {}
+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, bool IsLoad)
+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
- void apply(ScheduleDAGMI *DAG) override;
protected:
- void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+};
+
+class StoreClusterMutation : public BaseMemOpClusterMutation {
+public:
+ StoreClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, false) {}
+};
+
+class LoadClusterMutation : public BaseMemOpClusterMutation {
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, true) {}
};
} // anonymous
-void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
- ScheduleDAGMI *DAG) {
- SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
- for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
- SUnit *SU = Loads[Idx];
+void BaseMemOpClusterMutation::clusterNeighboringMemOps(
+ ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
+ SUnit *SU = MemOps[Idx];
unsigned BaseReg;
- unsigned Offset;
- if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
- LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ int64_t Offset;
+ if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
}
- if (LoadRecords.size() < 2)
+ if (MemOpRecords.size() < 2)
return;
- std::sort(LoadRecords.begin(), LoadRecords.end());
+
+ std::sort(MemOpRecords.begin(), MemOpRecords.end());
unsigned ClusterLength = 1;
- for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
- if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
+ if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
ClusterLength = 1;
continue;
}
- SUnit *SUa = LoadRecords[Idx].SU;
- SUnit *SUb = LoadRecords[Idx+1].SU;
- if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
- && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
-
- DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ SUnit *SUa = MemOpRecords[Idx].SU;
+ SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
+ ClusterLength) &&
+ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+ DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse.
@@ -1351,22 +1435,26 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
- }
- else
+ } else
ClusterLength = 1;
}
}
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
-void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
- // Map each store chain to a set of dependent loads.
+ // Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->mayLoad())
+ if ((IsLoad && !SU->getInstr()->mayLoad()) ||
+ (!IsLoad && !SU->getInstr()->mayStore()))
continue;
+
unsigned ChainPredID = DAG->SUnits.size();
for (SUnit::const_pred_iterator
PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
@@ -1376,7 +1464,7 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
}
}
// Check if this chain-like pred has been seen
- // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ // before. ChainPredID==MaxNodeID at the top of the schedule.
unsigned NumChains = StoreChainDependents.size();
std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
@@ -1384,9 +1472,10 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
StoreChainDependents.resize(NumChains + 1);
StoreChainDependents[Result.first->second].push_back(SU);
}
+
// Iterate over the store chains.
for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+ clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
}
//===----------------------------------------------------------------------===//
@@ -1403,7 +1492,7 @@ public:
MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
: TII(TII), TRI(TRI) {}
- void apply(ScheduleDAGMI *DAG) override;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
} // anonymous
@@ -1423,7 +1512,9 @@ static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
-void MacroFusion::apply(ScheduleDAGMI *DAG) {
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
// For now, assume targets can only fuse with the branch.
SUnit &ExitSU = DAG->ExitSU;
MachineInstr *Branch = ExitSU.getInstr();
@@ -1439,7 +1530,7 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
if (!HasDataDep(TRI, *Branch, *Pred))
continue;
- if (!TII.shouldScheduleAdjacent(Pred, Branch))
+ if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1474,7 +1565,7 @@ class CopyConstrain : public ScheduleDAGMutation {
public:
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
- void apply(ScheduleDAGMI *DAG) override;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
@@ -1505,12 +1596,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
MachineInstr *Copy = CopySU->getInstr();
// Check for pure vreg copies.
- unsigned SrcReg = Copy->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ const MachineOperand &SrcOp = Copy->getOperand(1);
+ unsigned SrcReg = SrcOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
return;
- unsigned DstReg = Copy->getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ const MachineOperand &DstOp = Copy->getOperand(0);
+ unsigned DstReg = DstOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
return;
// Check if either the dest or source is local. If it's live across a back
@@ -1627,15 +1720,16 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
/// \brief Callback from DAG postProcessing to create weak edges to encourage
/// copy elimination.
-void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
if (FirstPos == DAG->end())
return;
- RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
- &*priorNonDebug(DAG->end(), DAG->begin()));
+ *priorNonDebug(DAG->end(), DAG->begin()));
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
@@ -1862,7 +1956,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
+ if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
+ Available.size() >= ReadyListLimit)
Pending.push(SU);
else
Available.push(SU);
@@ -1905,8 +2000,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
if (!HazardRec->isEnabled()) {
// Bypass HazardRec virtual calls.
CurrCycle = NextCycle;
- }
- else {
+ } else {
// Bypass getHazardType calls in case of long latency.
for (; CurrCycle != NextCycle; ++CurrCycle) {
if (isTop())
@@ -2074,8 +2168,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// If we stall for any reason, bump the cycle.
if (NextCycle > CurrCycle) {
bumpCycle(NextCycle);
- }
- else {
+ } else {
// After updating ZoneCritResIdx and ExpectedLatency, check if we're
// resource limited. If a stall occurred, bumpCycle does this.
unsigned LFactor = SchedModel->getLatencyFactor();
@@ -2119,11 +2212,13 @@ void SchedBoundary::releasePending() {
if (checkHazard(SU))
continue;
+ if (Available.size() >= ReadyListLimit)
+ break;
+
Available.push(SU);
Pending.remove(Pending.begin()+i);
--i; --e;
}
- DEBUG(if (!Pending.empty()) Pending.dump());
CheckPending = false;
}
@@ -2163,6 +2258,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
bumpCycle(CurrCycle + 1);
releasePending();
}
+
+ DEBUG(Pending.dump());
+ DEBUG(Available.dump());
+
if (Available.size() == 1)
return *Available.begin();
return nullptr;
@@ -2177,8 +2276,7 @@ void SchedBoundary::dumpScheduledState() {
if (ZoneCritResIdx) {
ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
ResCount = getResourceCount(ZoneCritResIdx);
- }
- else {
+ } else {
ResFactor = SchedModel->getMicroOpFactor();
ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
}
@@ -2218,8 +2316,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
/// Set the CandPolicy given a scheduling zone given the current resources and
/// latencies inside and outside the zone.
-void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
- bool IsPostRA,
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
SchedBoundary &CurrZone,
SchedBoundary *OtherZone) {
// Apply preemptive heuristics based on the total latency and resources
@@ -2295,7 +2392,8 @@ const char *GenericSchedulerBase::getReasonStr(
GenericSchedulerBase::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
- case PhysRegCopy: return "PREG-COPY";
+ case Only1: return "ONLY1 ";
+ case PhysRegCopy: return "PREG-COPY ";
case RegExcess: return "REG-EXCESS";
case RegCritical: return "REG-CRIT ";
case Stall: return "STALL ";
@@ -2381,7 +2479,6 @@ static bool tryLess(int TryVal, int CandVal,
Cand.Reason = Reason;
return true;
}
- Cand.setRepeat(Reason);
return false;
}
@@ -2398,7 +2495,6 @@ static bool tryGreater(int TryVal, int CandVal,
Cand.Reason = Reason;
return true;
}
- Cand.setRepeat(Reason);
return false;
}
@@ -2414,8 +2510,7 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::TopPathReduce))
return true;
- }
- else {
+ } else {
if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
@@ -2428,10 +2523,13 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
return false;
}
-static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
- bool IsTop) {
+static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
- << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
+ tracePick(Cand.Reason, Cand.AtTop);
}
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
@@ -2460,6 +2558,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
Itin, DAG);
}
+ TopCand.SU = nullptr;
+ BotCand.SU = nullptr;
}
/// Initialize the per-region scheduling policy.
@@ -2487,8 +2587,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
RegionPolicy.OnlyBottomUp = true;
// Allow the subtarget to override default policy.
- MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
- NumRegionInstrs);
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
// After subtarget overrides, apply command line options.
if (!EnableRegPressure)
@@ -2582,19 +2681,25 @@ static bool tryPressure(const PressureChange &TryP,
GenericSchedulerBase::CandReason Reason,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) {
- unsigned TryPSet = TryP.getPSetOrMax();
- unsigned CandPSet = CandP.getPSetOrMax();
- // If both candidates affect the same set, go with the smallest increase.
- if (TryPSet == CandPSet) {
- return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
- Reason);
- }
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
Reason)) {
return true;
}
+ // Do not compare the magnitude of pressure changes between top and bottom
+ // boundary.
+ if (Cand.AtTop != TryCand.AtTop)
+ return false;
+
+ // If both candidates affect the same set in the same boundary, go with the
+ // smallest increase.
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
+ if (TryPSet == CandPSet) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
+ Reason);
+ }
int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
std::numeric_limits<int>::max();
@@ -2640,64 +2745,64 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
return 0;
}
-/// Apply a set of heursitics to a new candidate. Heuristics are currently
-/// hierarchical. This may be more efficient than a graduated cost model because
-/// we don't need to evaluate all aspects of the model for each node in the
-/// queue. But it's really done to make the heuristics easier to debug and
-/// statistically analyze.
-///
-/// \param Cand provides the policy and current best candidate.
-/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
-/// \param Zone describes the scheduled zone that we are extending.
-/// \param RPTracker describes reg pressure within the scheduled zone.
-/// \param TempTracker is a scratch pressure tracker to reuse in queries.
-void GenericScheduler::tryCandidate(SchedCandidate &Cand,
- SchedCandidate &TryCand,
- SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- RegPressureTracker &TempTracker) {
-
+void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+ Cand.SU = SU;
+ Cand.AtTop = AtTop;
if (DAG->isTrackingPressure()) {
- // Always initialize TryCand's RPDelta.
- if (Zone.isTop()) {
+ if (AtTop) {
TempTracker.getMaxDownwardPressureDelta(
- TryCand.SU->getInstr(),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
- }
- else {
+ } else {
if (VerifyScheduling) {
TempTracker.getMaxUpwardPressureDelta(
- TryCand.SU->getInstr(),
- &DAG->getPressureDiff(TryCand.SU),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ &DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
- }
- else {
+ } else {
RPTracker.getUpwardPressureDelta(
- TryCand.SU->getInstr(),
- DAG->getPressureDiff(TryCand.SU),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
}
}
}
- DEBUG(if (TryCand.RPDelta.Excess.isValid())
- dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") "
- << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
- << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
+ DEBUG(if (Cand.RPDelta.Excess.isValid())
+ dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
+ << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
+}
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending, or nullptr
+// if Cand is from a different zone than TryCand.
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
}
- if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
- biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
+ biasPhysRegCopy(Cand.SU, Cand.AtTop),
TryCand, Cand, PhysRegCopy))
return;
@@ -2715,17 +2820,26 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;
- // For loops that are acyclic path limited, aggressively schedule for latency.
- // This can result in very long dependence chains scheduled in sequence, so
- // once every cycle (when CurrMOps == 0), switch to normal heuristics.
- if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
- && tryLatency(TryCand, Cand, Zone))
- return;
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. This can result in very long dependence chains scheduled in
+ // sequence, so once every cycle (when CurrMOps == 0), switch to normal
+ // heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return;
- // Prioritize instructions that read unbuffered resources by stall cycles.
- if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
- Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+ }
// Keep clustered nodes together to encourage downstream peephole
// optimizations which may reduce resource requirements.
@@ -2733,18 +2847,23 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const SUnit *NextClusterSU =
- Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
- if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU,
TryCand, Cand, Cluster))
return;
- // Weak edges are for clustering and other constraints.
- if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
- getWeakLeft(Cand.SU, Zone.isTop()),
- TryCand, Cand, Weak)) {
- return;
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop),
+ TryCand, Cand, Weak))
+ return;
}
+
// Avoid increasing the max pressure of the entire region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
Cand.RPDelta.CurrentMax,
@@ -2752,34 +2871,35 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;
- // Avoid critical resource consumption and balance the schedule.
- TryCand.initResourceDelta(DAG, SchedModel);
- if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
- TryCand, Cand, ResourceReduce))
- return;
- if (tryGreater(TryCand.ResDelta.DemandedResources,
- Cand.ResDelta.DemandedResources,
- TryCand, Cand, ResourceDemand))
- return;
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
- // Avoid serializing long latency dependence chains.
- // For acyclic path limited loops, latency was already checked above.
- if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
- !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
- return;
- }
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return;
- // Prefer immediate defs/users of the last scheduled instruction. This is a
- // local pressure avoidance strategy that also makes the machine code
- // readable.
- if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
- TryCand, Cand, NextDefUse))
- return;
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // local pressure avoidance strategy that also makes the machine code
+ // readable.
+ if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
- // Fall through to original instruction order.
- if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
- || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
- TryCand.Reason = NodeOrder;
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
}
}
@@ -2789,20 +2909,20 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
- ReadyQueue &Q = Zone.Available;
-
- DEBUG(Q.dump());
-
// getMaxPressureDelta temporarily modifies the tracker.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+ ReadyQueue &Q = Zone.Available;
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
- SchedCandidate TryCand(Cand.Policy);
- TryCand.SU = *I;
- tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ tryCandidate(Cand, TryCand, ZoneArg);
if (TryCand.Reason != NoCand) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
@@ -2819,57 +2939,77 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
- DEBUG(dbgs() << "Pick Bot ONLY1\n");
+ tracePick(Only1, false);
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
- DEBUG(dbgs() << "Pick Top ONLY1\n");
+ tracePick(Only1, true);
return SU;
}
- CandPolicy NoPolicy;
- SchedCandidate BotCand(NoPolicy);
- SchedCandidate TopCand(NoPolicy);
// Set the bottom-up policy based on the state of the current bottom zone and
// the instructions outside the zone, including the top zone.
- setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
// the instructions outside the zone, including the bottom zone.
- setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
-
- // Prefer bottom scheduling when heuristics are silent.
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
- assert(BotCand.Reason != NoCand && "failed to find the first candidate");
-
- // If either Q has a single candidate that provides the least increase in
- // Excess pressure, we can immediately schedule from that Q.
- //
- // RegionCriticalPSets summarizes the pressure within the scheduled region and
- // affects picking from either Q. If scheduling in one direction must
- // increase pressure for one of the excess PSets, then schedule in that
- // direction first to provide more freedom in the other direction.
- if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
- || (BotCand.Reason == RegCritical
- && !BotCand.isRepeat(RegCritical)))
- {
- IsTopNode = false;
- tracePick(BotCand, IsTopNode);
- return BotCand.SU;
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
}
+
// Check if the top Q has a better candidate.
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
- assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
- // Choose the queue with the most important (lowest enum) reason.
- if (TopCand.Reason < BotCand.Reason) {
- IsTopNode = true;
- tracePick(TopCand, IsTopNode);
- return TopCand.SU;
+ // Pick best from BotCand and TopCand.
+ assert(BotCand.isValid());
+ assert(TopCand.isValid());
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ tryCandidate(Cand, TopCand, nullptr);
+ if (TopCand.Reason != NoCand) {
+ Cand.setBest(TopCand);
+ DEBUG(traceCandidate(Cand));
}
- // Otherwise prefer the bottom candidate, in node order if all else failed.
- IsTopNode = false;
- tracePick(BotCand, IsTopNode);
- return BotCand.SU;
+
+ IsTopNode = Cand.AtTop;
+ tracePick(Cand);
+ return Cand.SU;
}
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
@@ -2885,27 +3025,25 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
SU = Top.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
- SchedCandidate TopCand(NoPolicy);
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ TopCand.reset(NoPolicy);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand, true);
+ tracePick(TopCand);
SU = TopCand.SU;
}
IsTopNode = true;
- }
- else if (RegionPolicy.OnlyBottomUp) {
+ } else if (RegionPolicy.OnlyBottomUp) {
SU = Bot.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
- SchedCandidate BotCand(NoPolicy);
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ BotCand.reset(NoPolicy);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
- tracePick(BotCand, false);
+ tracePick(BotCand);
SU = BotCand.SU;
}
IsTopNode = false;
- }
- else {
+ } else {
SU = pickNodeBidirectional(IsTopNode);
}
} while (SU->isScheduled);
@@ -2957,8 +3095,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysRegCopies(SU, true);
- }
- else {
+ } else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
@@ -2976,8 +3113,12 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
- if (EnableLoadCluster && DAG->TII->enableClusterLoads())
- DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (EnableMemOpCluster) {
+ if (DAG->TII->enableClusterLoads())
+ DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (DAG->TII->enableClusterStores())
+ DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
+ }
if (EnableMacroFusion)
DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
@@ -3065,12 +3206,10 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
ReadyQueue &Q = Top.Available;
-
- DEBUG(Q.dump());
-
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
SchedCandidate TryCand(Cand.Policy);
TryCand.SU = *I;
+ TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
if (TryCand.Reason != NoCand) {
@@ -3089,7 +3228,9 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
SUnit *SU;
do {
SU = Top.pickOnlyChoice();
- if (!SU) {
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
CandPolicy NoPolicy;
SchedCandidate TopCand(NoPolicy);
// Set the top-down policy based on the state of the current top zone and
@@ -3097,7 +3238,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
pickNodeFromQueue(TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand, true);
+ tracePick(TopCand);
SU = TopCand.SU;
}
} while (SU->isScheduled);
@@ -3285,8 +3426,7 @@ public:
TopQ.pop();
} while (SU->isScheduled);
IsTopNode = true;
- }
- else {
+ } else {
do {
if (BottomQ.empty()) return nullptr;
SU = BottomQ.top();
OpenPOWER on IntegriCloud