diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 69 |
1 files changed, 66 insertions, 3 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 477c1ff..9f2f012 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,12 +27,21 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); +// This allows latency based scheduler to notice high latency instructions +// without a target itinerary. The choise if number here has more to do with +// balancing scheduler heursitics than with the actual machine latency. +static cl::opt<int> HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getTarget().getInstrItineraryData()) {} @@ -72,11 +81,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; @@ -273,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); + SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); @@ -325,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { if (!HasGlueUse) break; } + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. @@ -338,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } } void ScheduleDAGSDNodes::AddSchedEdges() { @@ -403,6 +440,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { @@ -410,11 +451,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } @@ -437,6 +482,10 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Initialize NumNodeDefs for the current Node's opcode. void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + if (!Node->isMachineOpcode()) { if (Node->getOpcode() == ISD::CopyFromReg) NodeNumDefs = 1; @@ -499,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; @@ -506,7 +565,11 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } if (!InstrItins || InstrItins->isEmpty()) { - SU->Latency = 1; + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; return; } @@ -573,7 +636,7 @@ namespace { }; } -/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, |