summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-11-24 17:02:24 +0000
committerdim <dim@FreeBSD.org>2014-11-24 17:02:24 +0000
commit2c8643c6396b0a3db33430cf9380e70bbb9efce0 (patch)
tree4df130b28021d86e13bf4565ef58c1c5a5e093b4 /contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
parent678318cd20f7db4e6c6b85d83fe00fa327b04fca (diff)
parente27feadae0885aa074df58ebfda2e7a7f7a7d590 (diff)
downloadFreeBSD-src-2c8643c6396b0a3db33430cf9380e70bbb9efce0.zip
FreeBSD-src-2c8643c6396b0a3db33430cf9380e70bbb9efce0.tar.gz
Merge llvm 3.5.0 release from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp626
1 files changed, 566 insertions, 60 deletions
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index c08d955..dee990c 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
@@ -35,17 +34,23 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumGlobalSplits, "Number of split global live ranges");
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");
@@ -59,6 +64,34 @@ SplitSpillMode("split-spill-mode", cl::Hidden,
clEnumValEnd),
cl::init(SplitEditor::SM_Partition));
+static cl::opt<unsigned>
+LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
+ cl::desc("Last chance recoloring max depth"),
+ cl::init(5));
+
+static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
+ "lcr-max-interf", cl::Hidden,
+ cl::desc("Last chance recoloring maximum number of considered"
+ " interference at a time"),
+ cl::init(8));
+
+static cl::opt<bool>
+ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
+ cl::desc("Exhaustive Search for registers bypassing the depth "
+ "and interference cutoffs of last chance recoloring"));
+
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned>
+CSRFirstTimeCost("regalloc-csr-first-time-cost",
+ cl::desc("Cost for first time use of callee-saved register."),
+ cl::init(0), cl::Hidden);
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -66,10 +99,19 @@ namespace {
class RAGreedy : public MachineFunctionPass,
public RegAllocBase,
private LiveRangeEdit::Delegate {
+ // Convenient shortcuts.
+ typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
+ typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
+ typedef SmallSet<unsigned, 16> SmallVirtRegSet;
// context
MachineFunction *MF;
+ // Shortcuts to some useful interface.
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ RegisterClassInfo RCI;
+
// analyses
SlotIndexes *Indexes;
MachineBlockFrequencyInfo *MBFI;
@@ -80,8 +122,8 @@ class RAGreedy : public MachineFunctionPass,
LiveDebugVariables *DebugVars;
// state
- OwningPtr<Spiller> SpillerInstance;
- std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+ std::unique_ptr<Spiller> SpillerInstance;
+ PQueue Queue;
unsigned NextCascade;
// Live ranges pass through a number of stages as we try to allocate them.
@@ -120,6 +162,22 @@ class RAGreedy : public MachineFunctionPass,
RS_Done
};
+ // Enum CutOffStage to keep a track whether the register allocation failed
+ // because of the cutoffs encountered in last chance recoloring.
+ // Note: This is used as bitmask. New value should be next power of 2.
+ enum CutOffStage {
+ // No cutoffs encountered
+ CO_None = 0,
+
+ // lcr-max-depth cutoff encountered
+ CO_Depth = 1,
+
+ // lcr-max-interf cutoff encountered
+ CO_Interf = 2
+ };
+
+ uint8_t CutOffInfo;
+
#ifndef NDEBUG
static const char *const StageName[];
#endif
@@ -160,20 +218,23 @@ class RAGreedy : public MachineFunctionPass,
unsigned BrokenHints; ///< Total number of broken hints.
float MaxWeight; ///< Maximum spill weight evicted.
- EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+ EvictionCost(): BrokenHints(0), MaxWeight(0) {}
bool isMax() const { return BrokenHints == ~0u; }
+ void setMax() { BrokenHints = ~0u; }
+
+ void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
bool operator<(const EvictionCost &O) const {
- if (BrokenHints != O.BrokenHints)
- return BrokenHints < O.BrokenHints;
- return MaxWeight < O.MaxWeight;
+ return std::tie(BrokenHints, MaxWeight) <
+ std::tie(O.BrokenHints, O.MaxWeight);
}
};
// splitting state.
- OwningPtr<SplitAnalysis> SA;
- OwningPtr<SplitEditor> SE;
+ std::unique_ptr<SplitAnalysis> SA;
+ std::unique_ptr<SplitEditor> SE;
/// Cached per-block interference maps
InterferenceCache IntfCache;
@@ -217,43 +278,54 @@ class RAGreedy : public MachineFunctionPass,
}
};
- /// Candidate info for for each PhysReg in AllocationOrder.
+ /// Candidate info for each PhysReg in AllocationOrder.
/// This vector never shrinks, but grows to the size of the largest register
/// class.
SmallVector<GlobalSplitCandidate, 32> GlobalCand;
- enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u };
+ enum : unsigned { NoCand = ~0u };
/// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
/// NoCand which indicates the stack interval.
SmallVector<unsigned, 32> BundleCand;
+ /// Callee-save register cost, calculated once per machine function.
+ BlockFrequency CSRCost;
+
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ bool EnableLocalReassign;
+
public:
RAGreedy();
/// Return the pass name.
- virtual const char* getPassName() const {
+ const char* getPassName() const override {
return "Greedy Register Allocator";
}
/// RAGreedy analysis usage.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual void releaseMemory();
- virtual Spiller &spiller() { return *SpillerInstance; }
- virtual void enqueue(LiveInterval *LI);
- virtual LiveInterval *dequeue();
- virtual unsigned selectOrSplit(LiveInterval&,
- SmallVectorImpl<unsigned>&);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ Spiller &spiller() override { return *SpillerInstance; }
+ void enqueue(LiveInterval *LI) override;
+ LiveInterval *dequeue() override;
+ unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
/// Perform register allocation.
- virtual bool runOnMachineFunction(MachineFunction &mf);
+ bool runOnMachineFunction(MachineFunction &mf) override;
static char ID;
private:
- bool LRE_CanEraseVirtReg(unsigned);
- void LRE_WillShrinkVirtReg(unsigned);
- void LRE_DidCloneVirtReg(unsigned, unsigned);
+ unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
+ SmallVirtRegSet &, unsigned = 0);
+
+ bool LRE_CanEraseVirtReg(unsigned) override;
+ void LRE_WillShrinkVirtReg(unsigned) override;
+ void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+ void enqueue(PQueue &CurQueue, LiveInterval *LI);
+ LiveInterval *dequeue(PQueue &CurQueue);
BlockFrequency calcSpillCost();
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
@@ -268,6 +340,9 @@ private:
bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
void evictInterference(LiveInterval&, unsigned,
SmallVectorImpl<unsigned>&);
+ bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters);
unsigned tryAssign(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
@@ -275,6 +350,21 @@ private:
SmallVectorImpl<unsigned>&, unsigned = ~0u);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
+ /// Calculate cost of region splitting.
+ unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands, bool IgnoreCSR);
+ /// Perform region splitting.
+ unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact,
+ SmallVectorImpl<unsigned> &NewVRegs);
+ /// Check other options before using a callee-saved register for the first
+ /// time.
+ unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
+ unsigned PhysReg, unsigned &CostPerUseLimit,
+ SmallVectorImpl<unsigned> &NewVRegs);
+ void initializeCSRCost();
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
@@ -283,6 +373,11 @@ private:
SmallVectorImpl<unsigned>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
+ unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<unsigned> &,
+ SmallVirtRegSet &, unsigned);
+ bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
+ SmallVirtRegSet &, unsigned);
};
} // end anonymous namespace
@@ -301,7 +396,7 @@ const char *const RAGreedy::StageName[] = {
// Hysteresis to use when comparing floats.
// This helps stabilize decisions based on float comparisons.
-const float Hysteresis = 0.98f;
+const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
FunctionPass* llvm::createGreedyRegisterAllocator() {
@@ -391,12 +486,14 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
}
void RAGreedy::releaseMemory() {
- SpillerInstance.reset(0);
+ SpillerInstance.reset();
ExtraRegInfo.clear();
GlobalCand.clear();
}
-void RAGreedy::enqueue(LiveInterval *LI) {
+void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
+
+void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
@@ -414,12 +511,25 @@ void RAGreedy::enqueue(LiveInterval *LI) {
// everything else has been allocated.
Prio = Size;
} else {
- if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() &&
+ // Giant live ranges fall back to the global assignment heuristic, which
+ // prevents excessive spilling in pathological cases.
+ bool ReverseLocal = TRI->reverseLocalAssignment();
+ bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() &&
+ (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
+
+ if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
// Allocate original local ranges in linear instruction order. Since they
// are singly defined, this produces optimal coloring in the absence of
// global interference and other constraints.
- Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+ if (!ReverseLocal)
+ Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+ else {
+ // Allocating bottom up may allow many short LRGs to be assigned first
+ // to one of the cheap registers. This could be much faster for very
+ // large blocks on targets with many physical registers.
+ Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex());
+ }
}
else {
// Allocate global and split ranges in long->short order. Long ranges that
@@ -436,14 +546,16 @@ void RAGreedy::enqueue(LiveInterval *LI) {
}
// The virtual register number is a tie breaker for same-sized ranges.
// Give lower vreg numbers higher priority to assign them first.
- Queue.push(std::make_pair(Prio, ~Reg));
+ CurQueue.push(std::make_pair(Prio, ~Reg));
}
-LiveInterval *RAGreedy::dequeue() {
- if (Queue.empty())
- return 0;
- LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
- Queue.pop();
+LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
+
+LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
+ if (CurQueue.empty())
+ return nullptr;
+ LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
+ CurQueue.pop();
return LI;
}
@@ -471,7 +583,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
if (Order.isHint(Hint)) {
DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
- EvictionCost MaxCost(1);
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
evictInterference(VirtReg, Hint, NewVRegs);
return Hint;
@@ -543,11 +656,15 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
if (CanSplit && IsHint && !BreaksHint)
return true;
- return A.weight > B.weight;
+ if (A.weight > B.weight) {
+ DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
+ return true;
+ }
+ return false;
}
/// canEvictInterference - Return true if all interferences between VirtReg and
-/// PhysReg can be evicted. When OnlyCheap is set, don't do anything
+/// PhysReg can be evicted.
///
/// @param VirtReg Live range that is about to be assigned.
/// @param PhysReg Desired register for assignment.
@@ -618,16 +735,16 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
return false;
if (Urgent)
continue;
+ // Apply the eviction policy for non-urgent evictions.
+ if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
// If !MaxCost.isMax(), then we're just looking for a cheap register.
// Evicting another local live range in this case could lead to suboptimal
// coloring.
if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
- !canReassign(*Intf, PhysReg)) {
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
return false;
}
- // Finally, apply the eviction policy for non-urgent evictions.
- if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
- return false;
}
}
MaxCost = Cost;
@@ -685,7 +802,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
// Keep track of the cheapest interference seen so far.
- EvictionCost BestCost(~0u);
+ EvictionCost BestCost;
+ BestCost.setMax();
unsigned BestPhys = 0;
unsigned OrderLimit = Order.getOrder().size();
@@ -713,7 +831,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
}
Order.rewind();
- while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) {
+ while (unsigned PhysReg = Order.next(OrderLimit)) {
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1172,9 +1290,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
unsigned NumCands = 0;
- unsigned BestCand = NoCand;
BlockFrequency BestCost;
- SmallVector<unsigned, 8> UsedCands;
// Check if we can split this live range around a compact region.
bool HasCompact = calcCompactRegion(GlobalCand.front());
@@ -1186,11 +1302,33 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// No benefit from the compact region, our fallback will be per-block
// splitting. Make sure we find a solution that is cheaper than spilling.
BestCost = calcSpillCost();
- DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ DEBUG(dbgs() << "Cost of isolating all blocks = ";
+ MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
}
+ unsigned BestCand =
+ calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
+ false/*IgnoreCSR*/);
+
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
+ return 0;
+
+ return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
+}
+
+unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands,
+ bool IgnoreCSR) {
+ unsigned BestCand = NoCand;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
+ if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+ if (IgnoreCSR && !MRI->isPhysRegUsed(CSR))
+ continue;
+
// Discard bad candidates before we run out of interference cache cursors.
// This will only affect register classes with a lot of registers (>32).
if (NumCands == IntfCache.getMaxCursors()) {
@@ -1220,7 +1358,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
continue;
}
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
+ MBFI->printBlockFreq(dbgs(), Cost));
if (Cost >= BestCost) {
DEBUG({
if (BestCand == NoCand)
@@ -1243,7 +1382,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
Cost += calcGlobalSplitCost(Cand);
DEBUG({
- dbgs() << ", total = " << Cost << " with bundles";
+ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
+ << " with bundles";
for (int i = Cand.LiveBundles.find_first(); i>=0;
i = Cand.LiveBundles.find_next(i))
dbgs() << " EB#" << i;
@@ -1255,11 +1395,13 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
}
++NumCands;
}
+ return BestCand;
+}
- // No solutions found, fall back to single block splitting.
- if (!HasCompact && BestCand == NoCand)
- return 0;
-
+unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVector<unsigned, 8> UsedCands;
// Prepare split editor.
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
@@ -1348,6 +1490,22 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Per-Instruction Splitting
//===----------------------------------------------------------------------===//
+/// Get the number of allocatable registers that match the constraints of \p Reg
+/// on \p MI and that are also in \p SuperRC.
+static unsigned getNumAllocatableRegsForConstraints(
+ const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
+ const RegisterClassInfo &RCI) {
+ assert(SuperRC && "Invalid register class");
+
+ const TargetRegisterClass *ConstrainedRC =
+ MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
+ /* ExploreBundle */ true);
+ if (!ConstrainedRC)
+ return 0;
+ return RCI.getNumAllocatableRegs(ConstrainedRC);
+}
+
/// tryInstructionSplit - Split a live range around individual instructions.
/// This is normally not worthwhile since the spiller is doing essentially the
/// same thing. However, when the live range is in a constrained register
@@ -1358,8 +1516,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned
RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
// There is no point to this if there are no larger sub-classes.
- if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+ if (!RegClassInfo.isProperSubClass(CurRC))
return 0;
// Always enable split spill mode, since we're effectively spilling to a
@@ -1373,10 +1532,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
- // Split around every non-copy instruction.
+ const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC);
+ unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
+ // Split around every non-copy instruction if this split will relax
+ // the constraints on the virtual register.
+ // Otherwise, splitting just inserts uncoalescable copies that do not help
+ // the allocation.
for (unsigned i = 0; i != Uses.size(); ++i) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
- if (MI->isFullCopy()) {
+ if (MI->isFullCopy() ||
+ SuperRCNumAllocatableRegs ==
+ getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
+ TRI, RCI)) {
DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
continue;
}
@@ -1571,7 +1738,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const float blockFreq =
SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
- (1.0f / BlockFrequency::getEntryFrequency());
+ (1.0f / MBFI->getEntryFreq());
SmallVector<float, 8> GapWeight;
Order.rewind();
@@ -1759,6 +1926,222 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
return tryBlockSplit(VirtReg, Order, NewVRegs);
}
+//===----------------------------------------------------------------------===//
+// Last Chance Recoloring
+//===----------------------------------------------------------------------===//
+
+/// mayRecolorAllInterferences - Check if the virtual registers that
+/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
+/// recolored to free \p PhysReg.
+/// When true is returned, \p RecoloringCandidates has been augmented with all
+/// the live intervals that need to be recolored in order to free \p PhysReg
+/// for \p VirtReg.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+bool
+RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // If there is LastChanceRecoloringMaxInterference or more interferences,
+ // chances are one would not be recolorable.
+ if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
+ LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
+ DEBUG(dbgs() << "Early abort: too many interferences.\n");
+ CutOffInfo |= CO_Interf;
+ return false;
+ }
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ // If Intf is done and sit on the same register class as VirtReg,
+ // it would not be recolorable as it is in the same state as VirtReg.
+ if ((getStage(*Intf) == RS_Done &&
+ MRI->getRegClass(Intf->reg) == CurRC) ||
+ FixedRegisters.count(Intf->reg)) {
+ DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
+ return false;
+ }
+ RecoloringCandidates.insert(Intf);
+ }
+ }
+ return true;
+}
+
+/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
+/// its interferences.
+/// Last chance recoloring chooses a color for \p VirtReg and recolors every
+/// virtual register that was using it. The recoloring process may recursively
+/// use the last chance recoloring. Therefore, when a virtual register has been
+/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
+/// be last-chance-recolored again during this recoloring "session".
+/// E.g.,
+/// Let
+/// vA can use {R1, R2 }
+/// vB can use { R2, R3}
+/// vC can use {R1 }
+/// Where vA, vB, and vC cannot be split anymore (they are reloads for
+/// instance) and they all interfere.
+///
+/// vA is assigned R1
+/// vB is assigned R2
+/// vC tries to evict vA but vA is already done.
+/// Regular register allocation fails.
+///
+/// Last chance recoloring kicks in:
+/// vC does as if vA was evicted => vC uses R1.
+/// vC is marked as fixed.
+/// vA needs to find a color.
+/// None are available.
+/// vA cannot evict vC: vC is a fixed virtual register now.
+/// vA does as if vB was evicted => vA uses R2.
+/// vB needs to find a color.
+/// R3 is available.
+/// Recoloring => vC = R1, vA = R2, vB = R3
+///
+/// \p Order defines the preferred allocation order for \p VirtReg.
+/// \p NewRegs will contain any new virtual register that have been created
+/// (split, spill) during the process and that must be assigned.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+/// \p Depth gives the current depth of the last chance recoloring.
+/// \return a physical register that can be used for VirtReg or ~0u if none
+/// exists.
+unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
+ DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+ // Ranges must be Done.
+ assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
+ "Last chance recoloring should really be last chance");
+ // Set the max depth to LastChanceRecoloringMaxDepth.
+ // We may want to reconsider that if we end up with a too large search space
+ // for target with hundreds of registers.
+ // Indeed, in that case we may want to cut the search space earlier.
+ if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
+ DEBUG(dbgs() << "Abort because max depth has been reached.\n");
+ CutOffInfo |= CO_Depth;
+ return ~0u;
+ }
+
+ // Set of Live intervals that will need to be recolored.
+ SmallLISet RecoloringCandidates;
+ // Record the original mapping virtual register to physical register in case
+ // the recoloring fails.
+ DenseMap<unsigned, unsigned> VirtRegToPhysReg;
+ // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
+ // this recoloring "session".
+ FixedRegisters.insert(VirtReg.reg);
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
+ << PrintReg(PhysReg, TRI) << '\n');
+ RecoloringCandidates.clear();
+ VirtRegToPhysReg.clear();
+
+ // It is only possible to recolor virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) >
+ LiveRegMatrix::IK_VirtReg) {
+ DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
+
+ continue;
+ }
+
+ // Early give up on this PhysReg if it is obvious we cannot recolor all
+ // the interferences.
+ if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
+ FixedRegisters)) {
+ DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
+ continue;
+ }
+
+ // RecoloringCandidates contains all the virtual registers that interfer
+ // with VirtReg on PhysReg (or one of its aliases).
+ // Enqueue them for recoloring and perform the actual recoloring.
+ PQueue RecoloringQueue;
+ for (SmallLISet::iterator It = RecoloringCandidates.begin(),
+ EndIt = RecoloringCandidates.end();
+ It != EndIt; ++It) {
+ unsigned ItVirtReg = (*It)->reg;
+ enqueue(RecoloringQueue, *It);
+ assert(VRM->hasPhys(ItVirtReg) &&
+ "Interferences are supposed to be with allocated vairables");
+
+ // Record the current allocation.
+ VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
+ // unset the related struct.
+ Matrix->unassign(**It);
+ }
+
+ // Do as if VirtReg was assigned to PhysReg so that the underlying
+ // recoloring has the right information about the interferes and
+ // available colors.
+ Matrix->assign(VirtReg, PhysReg);
+
+ // Save the current recoloring state.
+ // If we cannot recolor all the interferences, we will have to start again
+ // at this point for the next physical register.
+ SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
+ if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
+ Depth)) {
+ // Do not mess up with the global assignment process.
+ // I.e., VirtReg must be unassigned.
+ Matrix->unassign(VirtReg);
+ return PhysReg;
+ }
+
+ DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
+ << PrintReg(PhysReg, TRI) << '\n');
+
+ // The recoloring attempt failed, undo the changes.
+ FixedRegisters = SaveFixedRegisters;
+ Matrix->unassign(VirtReg);
+
+ for (SmallLISet::iterator It = RecoloringCandidates.begin(),
+ EndIt = RecoloringCandidates.end();
+ It != EndIt; ++It) {
+ unsigned ItVirtReg = (*It)->reg;
+ if (VRM->hasPhys(ItVirtReg))
+ Matrix->unassign(**It);
+ Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]);
+ }
+ }
+
+ // Last chance recoloring did not worked either, give up.
+ return ~0u;
+}
+
+/// tryRecoloringCandidates - Try to assign a new color to every register
+/// in \RecoloringQueue.
+/// \p NewRegs will contain any new virtual register created during the
+/// recoloring process.
+/// \p FixedRegisters[in/out] contains all the registers that have been
+/// recolored.
+/// \return true if all virtual registers in RecoloringQueue were successfully
+/// recolored, false otherwise.
+bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
+ while (!RecoloringQueue.empty()) {
+ LiveInterval *LI = dequeue(RecoloringQueue);
+ DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
+ unsigned PhysReg;
+ PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
+ if (PhysReg == ~0u || !PhysReg)
+ return false;
+ DEBUG(dbgs() << "Recoloring of " << *LI
+ << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+ Matrix->assign(*LI, PhysReg);
+ FixedRegisters.insert(LI->reg);
+ }
+ return true;
+}
//===----------------------------------------------------------------------===//
// Main Entry Point
@@ -1766,10 +2149,122 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs) {
+ CutOffInfo = CO_None;
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ SmallVirtRegSet FixedRegisters;
+ unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ if (Reg == ~0U && (CutOffInfo != CO_None)) {
+ uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
+ if (CutOffEncountered == CO_Depth)
+ Ctx.emitError("register allocation failed: maximum depth for recoloring "
+ "reached. Use -fexhaustive-register-search to skip "
+ "cutoffs");
+ else if (CutOffEncountered == CO_Interf)
+ Ctx.emitError("register allocation failed: maximum interference for "
+ "recoloring reached. Use -fexhaustive-register-search "
+ "to skip cutoffs");
+ else if (CutOffEncountered == (CO_Depth | CO_Interf))
+ Ctx.emitError("register allocation failed: maximum interference and "
+ "depth for recoloring reached. Use "
+ "-fexhaustive-register-search to skip cutoffs");
+ }
+ return Reg;
+}
+
+/// Using a CSR for the first time has a cost because it causes push|pop
+/// to be added to prologue|epilogue. Splitting a cold section of the live
+/// range can have lower cost than using the CSR for the first time;
+/// Spilling a live range in the cold path can have lower cost than using
+/// the CSR for the first time. Returns the physical register if we decide
+/// to use the CSR; otherwise return 0.
+unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ unsigned PhysReg,
+ unsigned &CostPerUseLimit,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
+ // We choose spill over using the CSR for the first time if the spill cost
+ // is lower than CSRCost.
+ SA->analyze(&VirtReg);
+ if (calcSpillCost() >= CSRCost)
+ return PhysReg;
+
+ // We are going to spill, set CostPerUseLimit to 1 to make sure that
+ // we will not use a callee-saved register in tryEvict.
+ CostPerUseLimit = 1;
+ return 0;
+ }
+ if (getStage(VirtReg) < RS_Split) {
+ // We choose pre-splitting over using the CSR for the first time if
+ // the cost of splitting is lower than CSRCost.
+ SA->analyze(&VirtReg);
+ unsigned NumCands = 0;
+ BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, true /*IgnoreCSR*/);
+ if (BestCand == NoCand)
+ // Use the CSR if we can't find a region split below CSRCost.
+ return PhysReg;
+
+ // Perform the actual pre-splitting.
+ doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
+ return 0;
+ }
+ return PhysReg;
+}
+
+void RAGreedy::initializeCSRCost() {
+ // We use the larger one out of the command-line option and the value report
+ // by TRI.
+ CSRCost = BlockFrequency(
+ std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
+ if (!CSRCost.getFrequency())
+ return;
+
+ // Raw cost is relative to Entry == 2^14; scale it appropriately.
+ uint64_t ActualEntry = MBFI->getEntryFreq();
+ if (!ActualEntry) {
+ CSRCost = 0;
+ return;
+ }
+ uint64_t FixedEntry = 1 << 14;
+ if (ActualEntry < FixedEntry)
+ CSRCost *= BranchProbability(ActualEntry, FixedEntry);
+ else if (ActualEntry <= UINT32_MAX)
+ // Invert the fraction and divide.
+ CSRCost /= BranchProbability(FixedEntry, ActualEntry);
+ else
+ // Can't use BranchProbability in general, since it takes 32-bit numbers.
+ CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
+}
+
+unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
+ unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
- return PhysReg;
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+ // We check other options if we are using a CSR for the first time.
+ bool CSRFirstUse = false;
+ if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+ if (!MRI->isPhysRegUsed(CSR))
+ CSRFirstUse = true;
+
+ // When NewVRegs is not empty, we may have made decisions such as evicting
+ // a virtual register, go with the earlier decisions and use the physical
+ // register.
+ if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) {
+ unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+ CostPerUseLimit, NewVRegs);
+ if (CSRReg || !NewVRegs.empty())
+ // Return now if we decide to use a CSR or create new vregs due to
+ // pre-splitting.
+ return CSRReg;
+ } else
+ return PhysReg;
+ }
LiveRangeStage Stage = getStage(VirtReg);
DEBUG(dbgs() << StageName[Stage]
@@ -1779,7 +2274,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// queue. The RS_Split ranges already failed to do this, and they should not
// get a second chance until they have been split.
if (Stage != RS_Split)
- if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit))
return PhysReg;
assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
@@ -1797,7 +2292,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// If we couldn't allocate a register from spilling, there is probably some
// invalid inline assembly. The base class wil report it.
if (Stage >= RS_Done || !VirtReg.isSpillable())
- return ~0u;
+ return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
+ Depth);
// Try splitting VirtReg or interferences.
unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
@@ -1823,6 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
+ const TargetMachine &TM = MF->getTarget();
+ TRI = TM.getRegisterInfo();
+ TII = TM.getInstrInfo();
+ RCI.runOnMachineFunction(mf);
+
+ EnableLocalReassign = EnableLocalReassignment ||
+ TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel());
+
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
@@ -1838,6 +2342,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
+ initializeCSRCost();
+
calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
DEBUG(LIS->dump());
OpenPOWER on IntegriCloud