summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp993
1 files changed, 502 insertions, 491 deletions
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 11b246a..22bfd4d 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -14,11 +14,11 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -27,6 +27,8 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -36,7 +38,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <queue>
using namespace llvm;
@@ -49,12 +50,51 @@ static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
+// Note: the two options below might be used in tuning compile time vs
+// output quality. Setting HugeRegion so large that it will never be
+// reached means best-effort, but may be slow.
+
+// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
+// together hold this many SUs, a reduction of maps will be done.
+static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
+ cl::init(1000), cl::desc("The limit to use while constructing the DAG "
+ "prior to scheduling, at which point a trade-off "
+ "is made to avoid excessive compile time."));
+
+static cl::opt<unsigned> ReductionSize(
+ "dag-maps-reduction-size", cl::Hidden,
+ cl::desc("A huge scheduling region will have maps reduced by this many "
+ "nodes at a time. Defaults to HugeRegion / 2."));
+
+static unsigned getReductionSize() {
+ // Always reduce a huge region with half of the elements, except
+ // when user sets this number explicitly.
+ if (ReductionSize.getNumOccurrences() == 0)
+ return HugeRegion / 2;
+ return ReductionSize;
+}
+
+static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << "{ ";
+ for (auto *su : L) {
+ dbgs() << "SU(" << su->NodeNum << ")";
+ if (su != L.back())
+ dbgs() << ", ";
+ }
+ dbgs() << "}\n";
+#endif
+}
+
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
- TrackLaneMasks(false), FirstDbgValue(nullptr) {
+ TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
+ UnknownValue(UndefValue::get(
+ Type::getVoidTy(mf.getFunction()->getContext()))),
+ FirstDbgValue(nullptr) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -120,10 +160,6 @@ static void getUnderlyingObjects(const Value *V,
} while (!Working.empty());
}
-typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
-typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
-UnderlyingObjectsVector;
-
/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
/// information and it can be tracked to a normal reference to a known
/// object, return the Value for that object.
@@ -131,46 +167,46 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo *MFI,
UnderlyingObjectsVector &Objects,
const DataLayout &DL) {
- if (!MI->hasOneMemOperand() ||
- (!(*MI->memoperands_begin())->getValue() &&
- !(*MI->memoperands_begin())->getPseudoValue()) ||
- (*MI->memoperands_begin())->isVolatile())
- return;
-
- if (const PseudoSourceValue *PSV =
- (*MI->memoperands_begin())->getPseudoValue()) {
- // Function that contain tail calls don't have unique PseudoSourceValue
- // objects. Two PseudoSourceValues might refer to the same or overlapping
- // locations. The client code calling this function assumes this is not the
- // case. So return a conservative answer of no known object.
- if (MFI->hasTailCall())
- return;
+ auto allMMOsOkay = [&]() {
+ for (const MachineMemOperand *MMO : MI->memoperands()) {
+ if (MMO->isVolatile())
+ return false;
+
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
+ // Function that contain tail calls don't have unique PseudoSourceValue
+ // objects. Two PseudoSourceValues might refer to the same or
+ // overlapping locations. The client code calling this function assumes
+ // this is not the case. So return a conservative answer of no known
+ // object.
+ if (MFI->hasTailCall())
+ return false;
- // For now, ignore PseudoSourceValues which may alias LLVM IR values
- // because the code that uses this function has no way to cope with
- // such aliases.
- if (!PSV->isAliased(MFI)) {
- bool MayAlias = PSV->mayAlias(MFI);
- Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
- }
- return;
- }
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return false;
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return;
+ bool MayAlias = PSV->mayAlias(MFI);
+ Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
+ } else if (const Value *V = MMO->getValue()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs, DL);
- SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs, DL);
+ for (Value *V : Objs) {
+ if (!isIdentifiedObject(V))
+ return false;
- for (Value *V : Objs) {
- if (!isIdentifiedObject(V)) {
- Objects.clear();
- return;
+ Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
+ }
+ } else
+ return false;
}
+ return true;
+ };
- Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
- }
+ if (!allMMOsOkay())
+ Objects.clear();
}
void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
@@ -475,10 +511,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// VReg2SUnit for the non-overlapping part.
LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
- if (NonOverlapMask != 0)
- CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
V2SU.SU = SU;
V2SU.LaneMask = OverlapMask;
+ if (NonOverlapMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
}
// If there was no CurrentVRegDefs entry for some lanes yet, create one.
if (LaneMask != 0)
@@ -518,84 +554,32 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() &&
- (!MI->mayLoad() || !MI->isInvariantLoad(AA)));
-}
-
-// This MI might have either incomplete info, or known to be unsafe
-// to deal with (i.e. volatile object).
-static inline bool isUnsafeMemoryObject(MachineInstr *MI,
- const MachineFrameInfo *MFI,
- const DataLayout &DL) {
- if (!MI || MI->memoperands_empty())
- return true;
- // We purposefully do no check for hasOneMemOperand() here
- // in hope to trigger an assert downstream in order to
- // finish implementation.
- if ((*MI->memoperands_begin())->isVolatile() ||
- MI->hasUnmodeledSideEffects())
- return true;
-
- if ((*MI->memoperands_begin())->getPseudoValue()) {
- // Similarly to getUnderlyingObjectForInstr:
- // For now, ignore PseudoSourceValues which may alias LLVM IR values
- // because the code that uses this function has no way to cope with
- // such aliases.
- return true;
- }
-
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return true;
-
- SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs, DL);
- for (Value *V : Objs) {
- // Does this pointer refer to a distinct and identifiable object?
- if (!isIdentifiedObject(V))
- return true;
- }
-
- return false;
+ (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA));
}
/// This returns true if the two MIs need a chain edge between them.
-/// If these are not even memory operations, we still may need
-/// chain deps between them. The question really is - could
-/// these two MIs be reordered during scheduling from memory dependency
-/// point of view.
+/// This is called on normal stores and loads.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
const DataLayout &DL, MachineInstr *MIa,
MachineInstr *MIb) {
const MachineFunction *MF = MIa->getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- // Cover a trivial case - no edge is need to itself.
- if (MIa == MIb)
- return false;
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if ((MIa->mayLoad() || MIa->mayStore()) &&
- (MIb->mayLoad() || MIb->mayStore()))
- if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
- return false;
+ assert ((MIa->mayStore() || MIb->mayStore()) &&
+ "Dependency checked between two loads");
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
- return true;
-
- if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
- return true;
-
- // If we are dealing with two "normal" loads, we do not need an edge
- // between them - they could be reordered.
- if (!MIa->mayStore() && !MIb->mayStore())
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
return false;
// To this point analysis is generic. From here on we do need AA.
if (!AA)
return true;
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ return true;
+
MachineMemOperand *MMOa = *MIa->memoperands_begin();
MachineMemOperand *MMOb = *MIb->memoperands_begin();
@@ -634,106 +618,15 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
return (AAResult != NoAlias);
}
-/// This recursive function iterates over chain deps of SUb looking for
-/// "latest" node that needs a chain edge to SUa.
-static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, SUnit *SUa, SUnit *SUb,
- SUnit *ExitSU, unsigned *Depth,
- SmallPtrSetImpl<const SUnit *> &Visited) {
- if (!SUa || !SUb || SUb == ExitSU)
- return *Depth;
-
- // Remember visited nodes.
- if (!Visited.insert(SUb).second)
- return *Depth;
- // If there is _some_ dependency already in place, do not
- // descend any further.
- // TODO: Need to make sure that if that dependency got eliminated or ignored
- // for any reason in the future, we would not violate DAG topology.
- // Currently it does not happen, but makes an implicit assumption about
- // future implementation.
- //
- // Independently, if we encounter node that is some sort of global
- // object (like a call) we already have full set of dependencies to it
- // and we can stop descending.
- if (SUa->isSucc(SUb) ||
- isGlobalMemoryObject(AA, SUb->getInstr()))
- return *Depth;
-
- // If we do need an edge, or we have exceeded depth budget,
- // add that edge to the predecessors chain of SUb,
- // and stop descending.
- if (*Depth > 200 ||
- MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
- SUb->addPred(SDep(SUa, SDep::MayAliasMem));
- return *Depth;
- }
- // Track current depth.
- (*Depth)++;
- // Iterate over memory dependencies only.
- for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
- I != E; ++I)
- if (I->isNormalMemoryOrBarrier())
- iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
- return *Depth;
-}
-
-/// This function assumes that "downward" from SU there exist
-/// tail/leaf of already constructed DAG. It iterates downward and
-/// checks whether SU can be aliasing any node dominated
-/// by it.
-static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
- std::set<SUnit *> &CheckList,
- unsigned LatencyToLoad) {
- if (!SU)
- return;
-
- SmallPtrSet<const SUnit*, 16> Visited;
- unsigned Depth = 0;
-
- for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
- I != IE; ++I) {
- if (SU == *I)
- continue;
- if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
- SDep Dep(SU, SDep::MayAliasMem);
- Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
- (*I)->addPred(Dep);
- }
-
- // Iterate recursively over all previously added memory chain
- // successors. Keep track of visited nodes.
- for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
- JE = (*I)->Succs.end(); J != JE; ++J)
- if (J->isNormalMemoryOrBarrier())
- iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
- Visited);
- }
-}
-
-/// Check whether two objects need a chain edge, if so, add it
-/// otherwise remember the rejected SU.
-static inline void addChainDependency(AliasAnalysis *AA,
- const MachineFrameInfo *MFI,
- const DataLayout &DL, SUnit *SUa,
- SUnit *SUb, std::set<SUnit *> &RejectList,
- unsigned TrueMemOrderLatency = 0,
- bool isNormalMemory = false) {
- // If this is a false dependency,
- // do not add the edge, but remember the rejected node.
- if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
- SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
- Dep.setLatency(TrueMemOrderLatency);
+/// Check whether two objects need a chain edge and add it if needed.
+void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
+ unsigned Latency) {
+ if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
+ SUb->getInstr())) {
+ SDep Dep(SUa, SDep::MayAliasMem);
+ Dep.setLatency(Latency);
SUb->addPred(Dep);
}
- else {
- // Duplicate entries should be ignored.
- RejectList.insert(SUb);
- DEBUG(dbgs() << "\tReject chain dep between SU("
- << SUa->NodeNum << ") and SU("
- << SUb->NodeNum << ")\n");
- }
}
/// Create an SUnit for each real instruction, numbered in top-down topological
@@ -752,16 +645,15 @@ void ScheduleDAGInstrs::initSUnits() {
// which is contained within a basic block.
SUnits.reserve(NumRegionInstrs);
- for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
- MachineInstr *MI = I;
- if (MI->isDebugValue())
+ for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) {
+ if (MI.isDebugValue())
continue;
- SUnit *SU = newSUnit(MI);
- MISUnitMap[MI] = SU;
+ SUnit *SU = newSUnit(&MI);
+ MISUnitMap[&MI] = SU;
- SU->isCall = MI->isCall();
- SU->isCommutable = MI->isCommutable();
+ SU->isCall = MI.isCall();
+ SU->isCommutable = MI.isCommutable();
// Assign the Latency field of SU using target-provided information.
SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
@@ -808,6 +700,19 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
+ // Ignore re-defs.
+ if (TrackLaneMasks) {
+ bool FoundDef = false;
+ for (const MachineOperand &MO2 : MI->operands()) {
+ if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+ FoundDef = true;
+ break;
+ }
+ }
+ if (FoundDef)
+ continue;
+ }
+
// Record this local VReg use.
VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
for (; UI != VRegUses.end(); ++UI) {
@@ -819,17 +724,136 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
}
}
+class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
+
+ /// Current total number of SUs in map.
+ unsigned NumNodes;
+
+ /// 1 for loads, 0 for stores. (see comment in SUList)
+ unsigned TrueMemOrderLatency;
+public:
+
+ Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
+
+ /// To keep NumNodes up to date, insert() is used instead of
+ /// this operator w/ push_back().
+ ValueType &operator[](const SUList &Key) {
+ llvm_unreachable("Don't use. Use insert() instead."); };
+
+ /// Add SU to the SUList of V. If Map grows huge, reduce its size
+ /// by calling reduce().
+ void inline insert(SUnit *SU, ValueType V) {
+ MapVector::operator[](V).push_back(SU);
+ NumNodes++;
+ }
+
+ /// Clears the list of SUs mapped to V.
+ void inline clearList(ValueType V) {
+ iterator Itr = find(V);
+ if (Itr != end()) {
+ assert (NumNodes >= Itr->second.size());
+ NumNodes -= Itr->second.size();
+
+ Itr->second.clear();
+ }
+ }
+
+ /// Clears map from all contents.
+ void clear() {
+ MapVector<ValueType, SUList>::clear();
+ NumNodes = 0;
+ }
+
+ unsigned inline size() const { return NumNodes; }
+
+ /// Count the number of SUs in this map after a reduction.
+ void reComputeSize(void) {
+ NumNodes = 0;
+ for (auto &I : *this)
+ NumNodes += I.second.size();
+ }
+
+ unsigned inline getTrueMemOrderLatency() const {
+ return TrueMemOrderLatency;
+ }
+
+ void dump();
+};
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap) {
+ for (auto &I : Val2SUsMap)
+ addChainDependencies(SU, I.second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap,
+ ValueType V) {
+ Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
+ if (Itr != Val2SUsMap.end())
+ addChainDependencies(SU, Itr->second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
+ assert (BarrierChain != nullptr);
+
+ for (auto &I : map) {
+ SUList &sus = I.second;
+ for (auto *SU : sus)
+ SU->addPredBarrier(BarrierChain);
+ }
+ map.clear();
+}
+
+void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
+ assert (BarrierChain != nullptr);
+
+ // Go through all lists of SUs.
+ for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
+ Value2SUsMap::iterator CurrItr = I++;
+ SUList &sus = CurrItr->second;
+ SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
+ for (; SUItr != SUEE; ++SUItr) {
+ // Stop on BarrierChain or any instruction above it.
+ if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
+ break;
+
+ (*SUItr)->addPredBarrier(BarrierChain);
+ }
+
+ // Remove also the BarrierChain from list if present.
+ if (SUItr != SUEE && *SUItr == BarrierChain)
+ SUItr++;
+
+ // Remove all SUs that are now successors of BarrierChain.
+ if (SUItr != sus.begin())
+ sus.erase(sus.begin(), SUItr);
+ }
+
+ // Remove all entries with empty su lists.
+ map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
+ return (mapEntry.second.empty()); });
+
+ // Recompute the size of the map (NumNodes).
+ map.reComputeSize();
+}
+
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
+ LiveIntervals *LIS,
bool TrackLaneMasks) {
const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
- AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
+ AAForDep = UseAA ? AA : nullptr;
+
+ BarrierChain = nullptr;
this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
@@ -841,19 +865,25 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (PDiffs)
PDiffs->init(SUnits.size());
- // We build scheduling units by walking a block's instruction list from bottom
- // to top.
-
- // Remember where a generic side-effecting instruction is as we proceed.
- SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
-
- // Memory references to specific known memory locations are tracked
- // so that they can be given more precise dependencies. We track
- // separately the known memory locations that may alias and those
- // that are known not to alias
- MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
- MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
- std::set<SUnit*> RejectMemNodes;
+ // We build scheduling units by walking a block's instruction list
+ // from bottom to top.
+
+ // Each MIs' memory operand(s) is analyzed to a list of underlying
+ // objects. The SU is then inserted in the SUList(s) mapped from the
+ // Value(s). Each Value thus gets mapped to lists of SUs depending
+ // on it, stores and loads kept separately. Two SUs are trivially
+ // non-aliasing if they both depend on only identified Values and do
+ // not share any common Value.
+ Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
+
+ // Certain memory accesses are known to not alias any SU in Stores
+ // or Loads, and have therefore their own 'NonAlias'
+ // domain. E.g. spill / reload instructions never alias LLVM I/R
+ // Values. It would be nice to assume that this type of memory
+ // accesses always have a proper memory operand modelling, and are
+ // therefore never unanalyzable, but this is conservatively not
+ // done.
+ Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
@@ -882,283 +912,201 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
MachineInstr *DbgMI = nullptr;
for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
- MachineInstr *MI = std::prev(MII);
- if (MI && DbgMI) {
- DbgValues.push_back(std::make_pair(DbgMI, MI));
+ MachineInstr &MI = *std::prev(MII);
+ if (DbgMI) {
+ DbgValues.push_back(std::make_pair(DbgMI, &MI));
DbgMI = nullptr;
}
- if (MI->isDebugValue()) {
- DbgMI = MI;
+ if (MI.isDebugValue()) {
+ DbgMI = &MI;
continue;
}
- SUnit *SU = MISUnitMap[MI];
+ SUnit *SU = MISUnitMap[&MI];
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
collectVRegUses(SU);
RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, MRI);
+ RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
+ }
if (PDiffs != nullptr)
PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
RPTracker->recedeSkipDebugValues();
- assert(&*RPTracker->getPos() == MI && "RPTracker in sync");
+ assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
RPTracker->recede(RegOpers);
}
assert(
- (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
+ (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
"Cannot schedule terminators or labels!");
// Add register-based dependencies (data, anti, and output).
+ // For some instructions (calls, returns, inline-asm, etc.) there can
+ // be explicit uses and implicit defs, in which case the use will appear
+ // on the operand list before the def. Do two passes over the operand
+ // list to make sure that defs are processed before any uses.
bool HasVRegDef = false;
- for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
- const MachineOperand &MO = MI->getOperand(j);
- if (!MO.isReg()) continue;
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
+ if (Reg == 0)
+ continue;
if (TRI->isPhysicalRegister(Reg))
addPhysRegDeps(SU, j);
else {
- if (MO.isDef()) {
- HasVRegDef = true;
- addVRegDefDeps(SU, j);
- }
- else if (MO.readsReg()) // ignore undef operands
- addVRegUseDeps(SU, j);
+ HasVRegDef = true;
+ addVRegDefDeps(SU, j);
}
}
+ // Now process all uses.
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ // Only look at use operands.
+ // We do not need to check for MO.readsReg() here because subsequent
+ // subregister defs will get output dependence edges and need no
+ // additional use dependencies.
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
+ }
+
// If we haven't seen any uses in this scheduling region, create a
// dependence edge to ExitSU to model the live-out latency. This is required
// for vreg defs with no in-region use, and prefetches with no vreg def.
//
// FIXME: NumDataSuccs would be more precise than NumSuccs here. This
// check currently relies on being called before adding chain deps.
- if (SU->NumSuccs == 0 && SU->Latency > 1
- && (HasVRegDef || MI->mayLoad())) {
+ if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
SDep Dep(SU, SDep::Artificial);
Dep.setLatency(SU->Latency - 1);
ExitSU.addPred(Dep);
}
- // Add chain dependencies.
- // Chain dependencies used to enforce memory order should have
- // latency of 0 (except for true dependency of Store followed by
- // aliased Load... we estimate that with a single cycle of latency
- // assuming the hardware will bypass)
- // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
- // after stack slots are lowered to actual addresses.
- // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
- // produce more precise dependence information.
- unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
- if (isGlobalMemoryObject(AA, MI)) {
- // Be conservative with these and add dependencies on all memory
- // references, even those that are known to not alias.
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
- I->second[i]->addPred(SDep(SU, SDep::Barrier));
- }
- }
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
- SDep Dep(SU, SDep::Barrier);
- Dep.setLatency(TrueMemOrderLatency);
- I->second[i]->addPred(Dep);
- }
- }
- // Add SU to the barrier chain.
+ // Add memory dependencies (Note: isStoreToStackSlot and
+ // isLoadFromStackSLot are not usable after stack slots are lowered to
+ // actual addresses).
+
+ // This is a barrier event that acts as a pivotal node in the DAG.
+ if (isGlobalMemoryObject(AA, &MI)) {
+
+ // Become the barrier chain.
if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ BarrierChain->addPredBarrier(SU);
BarrierChain = SU;
- // This is a barrier event that acts as a pivotal node in the DAG,
- // so it is safe to clear list of exposed nodes.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
- RejectMemNodes.clear();
- NonAliasMemDefs.clear();
- NonAliasMemUses.clear();
-
- // fall-through
- new_alias_chain:
- // Chain all possibly aliasing memory references through SU.
- if (AliasChain) {
- unsigned ChainLatency = 0;
- if (AliasChain->getInstr()->mayLoad())
- ChainLatency = TrueMemOrderLatency;
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
- RejectMemNodes, ChainLatency);
- }
- AliasChain = SU;
- for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- PendingLoads[k], RejectMemNodes,
- TrueMemOrderLatency);
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes);
- }
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes, TrueMemOrderLatency);
- }
- // This call must come after calls to addChainDependency() since it
- // consumes the 'RejectMemNodes' list that addChainDependency() possibly
- // adds to.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
- PendingLoads.clear();
- AliasMemDefs.clear();
- AliasMemUses.clear();
- } else if (MI->mayStore()) {
- // Add dependence on barrier chain, if needed.
- // There is no point to check aliasing on barrier event. Even if
- // SU and barrier _could_ be reordered, they should not. In addition,
- // we have lost all RejectMemNodes below barrier.
- if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
- UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
+ DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
- if (Objs.empty()) {
- // Treat all other stores conservatively.
- goto new_alias_chain;
- }
+ // Add dependencies against everything below it and clear maps.
+ addBarrierChain(Stores);
+ addBarrierChain(Loads);
+ addBarrierChain(NonAliasStores);
+ addBarrierChain(NonAliasLoads);
- bool MayAlias = false;
- for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
- K != KE; ++K) {
- ValueType V = K->getPointer();
- bool ThisMayAlias = K->getInt();
- if (ThisMayAlias)
- MayAlias = true;
-
- // A store to a specific PseudoSourceValue. Add precise dependencies.
- // Record the def in MemDefs, first adding a dep if there is
- // an existing def.
- MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
- ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
- if (I != IE) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes, 0, true);
-
- // If we're not using AA, then we only need one store per object.
- if (!AAForDep)
- I->second.clear();
- I->second.push_back(SU);
- } else {
- if (ThisMayAlias) {
- if (!AAForDep)
- AliasMemDefs[V].clear();
- AliasMemDefs[V].push_back(SU);
- } else {
- if (!AAForDep)
- NonAliasMemDefs[V].clear();
- NonAliasMemDefs[V].push_back(SU);
- }
+ continue;
+ }
+
+ // If it's not a store or a variant load, we're done.
+ if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA)))
+ continue;
+
+ // Always add dependecy edge to BarrierChain if present.
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ // Find the underlying objects for MI. The Objs vector is either
+ // empty, or filled with the Values of memory locations which this
+ // SU depends on. An empty vector means the memory location is
+ // unknown, and may alias anything.
+ UnderlyingObjectsVector Objs;
+ getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout());
+
+ if (MI.mayStore()) {
+ if (Objs.empty()) {
+ // An unknown store depends on all stores and loads.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+ addChainDependencies(SU, Loads);
+ addChainDependencies(SU, NonAliasLoads);
+
+ // Map this store to 'UnknownValue'.
+ Stores.insert(SU, UnknownValue);
+ } else {
+ // Add precise dependencies against all previously seen memory
+ // accesses mapped to the same Value(s).
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add dependencies to previous stores and loads mapped to V.
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
+ addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
}
- // Handle the uses in MemUses, if there are any.
- MapVector<ValueType, std::vector<SUnit *> >::iterator J =
- ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
- MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
- ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
- if (J != JE) {
- for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- J->second[i], RejectMemNodes,
- TrueMemOrderLatency, true);
- J->second.clear();
+ // Update the store map after all chains have been added to avoid adding
+ // self-loop edge if multiple underlying objects are present.
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Map this store to V.
+ (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
}
+ // The store may have dependencies to unanalyzable loads and
+ // stores.
+ addChainDependencies(SU, Loads, UnknownValue);
+ addChainDependencies(SU, Stores, UnknownValue);
}
- if (MayAlias) {
- // Add dependencies from all the PendingLoads, i.e. loads
- // with no underlying object.
- for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- PendingLoads[k], RejectMemNodes,
- TrueMemOrderLatency);
- // Add dependence on alias chain, if needed.
- if (AliasChain)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
- RejectMemNodes);
- }
- // This call must come after calls to addChainDependency() since it
- // consumes the 'RejectMemNodes' list that addChainDependency() possibly
- // adds to.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
- } else if (MI->mayLoad()) {
- bool MayAlias = true;
- if (MI->isInvariantLoad(AA)) {
- // Invariant load, no chain dependencies needed!
+ } else { // SU is a load.
+ if (Objs.empty()) {
+ // An unknown load depends on all stores.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+
+ Loads.insert(SU, UnknownValue);
} else {
- UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
-
- if (Objs.empty()) {
- // A load with no underlying object. Depend on all
- // potentially aliasing stores.
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes);
-
- PendingLoads.push_back(SU);
- MayAlias = true;
- } else {
- MayAlias = false;
- }
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add precise dependencies against all previously seen stores
+ // mapping to the same Value(s).
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
- for (UnderlyingObjectsVector::iterator
- J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
- ValueType V = J->getPointer();
- bool ThisMayAlias = J->getInt();
-
- if (ThisMayAlias)
- MayAlias = true;
-
- // A load from a specific PseudoSourceValue. Add precise dependencies.
- MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
- ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
- if (I != IE)
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes, 0, true);
- if (ThisMayAlias)
- AliasMemUses[V].push_back(SU);
- else
- NonAliasMemUses[V].push_back(SU);
+ // Map this load to V.
+ (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
}
- // Add dependencies on alias and barrier chains, if needed.
- if (MayAlias && AliasChain)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
- RejectMemNodes);
- if (MayAlias)
- // This call must come after calls to addChainDependency() since it
- // consumes the 'RejectMemNodes' list that addChainDependency()
- // possibly adds to.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
- RejectMemNodes, /*Latency=*/0);
- if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ // The load may have dependencies to unanalyzable stores.
+ addChainDependencies(SU, Stores, UnknownValue);
}
}
+
+ // Reduce maps if they grow huge.
+ if (Stores.size() + Loads.size() >= HugeRegion) {
+ DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
+ reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
+ }
+ if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
+ DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
+ reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
+ }
}
+
if (DbgMI)
FirstDbgValue = DbgMI;
@@ -1166,7 +1114,84 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Uses.clear();
CurrentVRegDefs.clear();
CurrentVRegUses.clear();
- PendingLoads.clear();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
+ PSV->printCustom(OS);
+ return OS;
+}
+
+void ScheduleDAGInstrs::Value2SUsMap::dump() {
+ for (auto &Itr : *this) {
+ if (Itr.first.is<const Value*>()) {
+ const Value *V = Itr.first.get<const Value*>();
+ if (isa<UndefValue>(V))
+ dbgs() << "Unknown";
+ else
+ V->printAsOperand(dbgs());
+ }
+ else if (Itr.first.is<const PseudoSourceValue*>())
+ dbgs() << Itr.first.get<const PseudoSourceValue*>();
+ else
+ llvm_unreachable("Unknown Value type.");
+
+ dbgs() << " : ";
+ dumpSUList(Itr.second);
+ }
+}
+
+/// Reduce maps in FIFO order, by N SUs. This is better than turning
+/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
+/// it avoids unnecessary edges between seen SUs above the new
+/// BarrierChain, and those below it.
+void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
+ Value2SUsMap &loads, unsigned N) {
+ DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
+ stores.dump();
+ dbgs() << "Loading SUnits:\n";
+ loads.dump());
+
+ // Insert all SU's NodeNums into a vector and sort it.
+ std::vector<unsigned> NodeNums;
+ NodeNums.reserve(stores.size() + loads.size());
+ for (auto &I : stores)
+ for (auto *SU : I.second)
+ NodeNums.push_back(SU->NodeNum);
+ for (auto &I : loads)
+ for (auto *SU : I.second)
+ NodeNums.push_back(SU->NodeNum);
+ std::sort(NodeNums.begin(), NodeNums.end());
+
+ // The N last elements in NodeNums will be removed, and the SU with
+ // the lowest NodeNum of them will become the new BarrierChain to
+ // let the not yet seen SUs have a dependency to the removed SUs.
+ assert (N <= NodeNums.size());
+ SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
+ if (BarrierChain) {
+ // The aliasing and non-aliasing maps reduce independently of each
+ // other, but share a common BarrierChain. Check if the
+ // newBarrierChain is above the former one. If it is not, it may
+ // introduce a loop to use newBarrierChain, so keep the old one.
+ if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
+ BarrierChain->addPredBarrier(newBarrierChain);
+ BarrierChain = newBarrierChain;
+ DEBUG(dbgs() << "Inserting new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ DEBUG(dbgs() << "Keeping old barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ BarrierChain = newBarrierChain;
+
+ insertBarrierChain(stores);
+ insertBarrierChain(loads);
+
+ DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
+ stores.dump();
+ dbgs() << "Loading SUnits:\n";
+ loads.dump());
}
/// \brief Initialize register live-range state for updating kills.
@@ -1190,7 +1215,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
/// operands, then we also need to propagate that to any instructions inside
/// the bundle which had the same kill state.
static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
- bool NewKillState) {
+ bool NewKillState,
+ const TargetRegisterInfo *TRI) {
if (MI->getOpcode() != TargetOpcode::BUNDLE)
return;
@@ -1199,30 +1225,13 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
// might set it on too many operands. We will clear as many flags as we
// can though.
MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+ MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);
while (Begin != End) {
- for (MachineOperand &MO : (--End)->operands()) {
- if (!MO.isReg() || MO.isDef() || Reg != MO.getReg())
- continue;
-
- // DEBUG_VALUE nodes do not contribute to code generation and should
- // always be ignored. Failure to do so may result in trying to modify
- // KILL flags on DEBUG_VALUE nodes, which is distressing.
- if (MO.isDebug())
- continue;
-
- // If the register has the internal flag then it could be killing an
- // internal def of the register. In this case, just skip. We only want
- // to toggle the flag on operands visible outside the bundle.
- if (MO.isInternalRead())
- continue;
-
- if (MO.isKill() == NewKillState)
- continue;
- MO.setIsKill(NewKillState);
- if (NewKillState)
- return;
- }
+ if (NewKillState) {
+ if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ return;
+ } else
+ (--End)->clearRegisterKills(Reg, TRI);
}
}
@@ -1230,21 +1239,21 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
// Setting kill flag...
if (!MO.isKill()) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true);
+ toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
return false;
}
// If MO itself is live, clear the kill flag...
if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false);
+ toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
return false;
}
// If any subreg of MO is live, then create an imp-def for that
// subreg and keep MO marked as killed.
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false);
+ toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
MachineInstrBuilder MIB(MF, MI);
@@ -1257,7 +1266,7 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
if(AllDead) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true);
+ toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
}
return false;
}
@@ -1275,15 +1284,15 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
unsigned Count = MBB->size();
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
I != E; --Count) {
- MachineInstr *MI = --I;
- if (MI->isDebugValue())
+ MachineInstr &MI = *--I;
+ if (MI.isDebugValue())
continue;
// Update liveness. Registers that are defed but not used in this
// instruction are now dead. Mark register and all subregs as they
// are completely defined.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isRegMask())
LiveRegs.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg()) continue;
@@ -1291,7 +1300,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (Reg == 0) continue;
if (!MO.isDef()) continue;
// Ignore two-addr defs.
- if (MI->isRegTiedToUseOperand(i)) continue;
+ if (MI.isRegTiedToUseOperand(i)) continue;
// Repeat for reg and all subregs.
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -1303,8 +1312,8 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
// register is used multiple times we only set the kill flag on
// the first use. Don't set kill flags on undef operands.
killedRegs.reset();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
@@ -1329,13 +1338,15 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
// Warning: toggleKillFlag may invalidate MO.
- toggleKillFlag(MI, MO);
- DEBUG(MI->dump());
- DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
- MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
- while (++Begin != End)
- DEBUG(Begin->dump());
+ toggleKillFlag(&MI, MO);
+ DEBUG(MI.dump());
+ DEBUG({
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ MachineBasicBlock::instr_iterator Begin = MI.getIterator();
+ MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+ while (++Begin != End)
+ DEBUG(Begin->dump());
+ }
});
}
@@ -1344,8 +1355,8 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
// Mark any used register (that is not using undef) and subregs as
// now live...
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
OpenPOWER on IntegriCloud